-
Notifications
You must be signed in to change notification settings - Fork 0
/
DataFundaNoSQL-Exercises.json
389 lines (362 loc) · 9.31 KB
/
DataFundaNoSQL-Exercises.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
#
# Final Exam 2023 - Elasticsearch
#
### A - Movies Index ###
# A-1
# "Quentin Tarantino" movies which rating >= 5 but neither an action movie nor a drama
# Hint: a bool query for an exact search can be used
# 2
GET /movies2/_search
{
}
# A-2
# "J.J. Abrams" movies released between 2010 and 2015 with a better score for stories about "crash train"
# Hint: use a bool query with a range filter
# 3
GET /movies2/_search
{
}
### B - Flight Records ###
# B-01
# Import the flights records in a "flights" index
# Hints: 1°) delete the index "flights" if already existing (optional)
# 2°) create the index "flights" with the corresponding mapping
# 3°) load the flight records via a _bulk import command
#
# B-02
# Analyse the general form and fields of a flight record
#
GET /flights/_search?pretty
{
"size": 2,
"query": {
"bool": {
"should": {"match": { "Cancelled": true}}
}
}
}
# B-03
# Percentage of delayed flights in october 2023
# Hints: 1°) Calculate the number of flights during october 2023
# with a range query with upper and lower bounds combined with an aggregation over the whole results
# 2°) Calculate the number of delayed flights during october 2023 => find a criteria that indicate a delayed flight
# 714/2881
GET /flights/_search
{
"size": 0,
"query": {
"bool": {
"must": [],
"filter": [
{
"range": {
"timestamp": {
"format": "strict_date_optional_time",
"gte": "2023-10-01",
"lt": "2023-11-01"
}
}
}
],
"should": [],
"must_not": []
}
},
"aggs": {
"total_bucket":{
"filter":{
"bool":{
"must":[]
}
}
},
"delayed-flight-bucket": {
"filter": {
"bool": {
"must": [],
"filter": [
{
"bool": {
"should": [
{
"match": {
"FlightDelay": true
}
}
],
"minimum_should_match": 1
}
}
],
"should": [],
"must_not": []
}
}
}
}
}
# B-04
# Flights of october 2023
# 2881(all)/2521(w/cancelled)
#
GET /flights/_search?pretty
{
"size":0,
"query":{
"bool":{
"must":[],
"filter":[
{
"range":{
"timestamp":{
"gte":"2023-10-01",
"lt": "2023-11-01"
}
}
}
]
}
},
"aggs":{
"total":{
"filter":{
"bool":{
"must":[]
}
}
},
"canceled":{
"filter":{
"bool":{
"must":[],
"filter":[
{"bool":{
"should":[
{"match":{"Cancelled":false}}
]
}}
]
}
}
}
}
}
# B-05
# Delayed flights in october 203
# 714
GET /flights/_search?pretty
{
"size":0,
"query":{
"bool":{
"must":[],
"filter":[
{
"range":{
"timestamp":{
"gte":"2023-10-01",
"lt": "2023-11-01"
}
}
}
]
}
},
"aggs":{
"delayed":{
"filter":{
"bool":{
"must":[],
"filter":[
{"bool":{
"should":[
{"match":{"FlightDelay":true}}
]
}}
]
}
}
}
}
}
# B-06
# Top 3 cities with highest cancelled flights
# London(63)|Rome(59)|Quito(50)
#
GET /flights/_search
{
"size": 0,
"query": {
"bool": {
"must": [{"match":{"Cancelled":true}}]
}
},
"aggs": {
"Countries_cancel":{
"terms":{"field":"OriginCountry","size":3}
,"aggs":{
"cities_cancel":{
"terms":{"field":"OriginCityName","size":3}
}}
}
}
}
# B-07
# Top 3 countries with their top 3 cities with highest number of cancelled flights
# For each country: display the min & max flight delays in minutes
# For each city, display the average flight delay in minutes
# IT(3160-360)/US(255-0-360)/JP(118-0-345)
#
GET /flights/_search
{
"size": 0,
"query": {
"bool": {
"must": [{"match":{"Cancelled":true}}]
}
},
"aggs": {
"Countries_cancel":{
"terms":{"field":"OriginCountry","size":3}
,"aggs":{
"cities_cancel":{
"terms":{"field":"OriginCityName","size":3},
"aggs":{
"average_flights":{
"avg":{"field":"FlightTimeMin"}
}
}
}
,"min_flight":{
"min":{"field":"FlightDelayMin"}
}
,"max_flight":{
"max":{"field":"FlightDelayMin"}
}
}
}
}
}
# B-08
# Scoring interpretation: The order is implied and is order from the most to the least (weather)
# Distribution of terms within the "weather" field
# Hints: 1°) use an aggregate query
# 2°) aggregate over the "weather" terms
# "Clear", "Cloudy", "Rain", "Sunny", "Thunder & Lightning", "Hail", "Damaging Wind", "Heavy Fog",
GET /flights/_search?pretty
{
"size":0,
"aggs":{
"climas":{
"terms": {
"field": "OriginWeather"
}
}
}
}
# B-09
# Select the flights (except cancelled) departing from Italy with "Rain" or "Thunder & Lightning" weather conditions at their departure and "Sunny" upon their arrival
# 56
#
GET /flights/_search?pretty
{
"size":0,
"query": {
"bool": {"must": [
{ "bool": { "should": [
{"match": { "OriginWeather": "Rain"}},
{"match": { "OriginWeather": "Thunder & Lightning"}}
]}
},
{ "match": { "DestWeather": "Sunny" } }
]
, "should": [ {"match": { "OriginCountry": "IT" }} ]
, "must_not": [ {"match": { "Cancelled": false}} ]
}
}
,"aggs":{
"results":{
"filter": {"bool": {"must":[]}}
}
}
}
# B-10
# Explain the main difference between the scores of the first result vs. the last result of the previous query
# Propose 2 analysis queries and explain the main difference between the result of their ranking in the previous query
#
# Explain the score from the first movie from the previous query :
# Hint: 1°) use an explain query
# 2°) identify the document under analysis by it's _id
GET /flights/_doc/He6Xj4sBj2TkY93KWdZU/_explain
{
"query": {
"bool": {"must": [
{ "bool": { "should": [
{"match": { "OriginWeather": "Rain"}},
{"match": { "OriginWeather": "Thunder & Lightning"}}
]}
},
{ "match": { "DestWeather": "Sunny" } }
]
, "should": [ {"match": { "OriginCountry": "IT" }} ]
, "must_not": [ {"match": { "Cancelled": false}} ]
}
}
}
# Explain the score from the last movie from the previous query :
# Hint: 1°) use an explain query
# 2°) identify the document under analysis by it's _id
GET /flights/_doc/Me6Xj4sBj2TkY93KWdZV/_explain
{
"query": {
"bool": {"must": [
{ "bool": { "should": [
{"match": { "OriginWeather": "Rain"}},
{"match": { "OriginWeather": "Thunder & Lightning"}}
]}
},
{ "match": { "DestWeather": "Sunny" } }
]
, "should": [ {"match": { "OriginCountry": "IT" }} ]
, "must_not": [ {"match": { "Cancelled": false}} ]
}
}
}
# B-11
# Count the flights within the following categories:
# - no delay
# - less than 45 minutes
# - 45 < delay <= 2 hours
# - > 2 hours
# Hint: Look for range aggregation: https://www.elastic.co/guide/en/elasticsearch/reference/8.10/search-aggregations-bucket-range-aggregation.html
# 9779|413|647|2220
#
GET /flights/_search?pretty
{
"size": 0,
"aggs": {
"big_delays": {
"range": {
"field": "FlightDelayMin",
"ranges": [
{"key": "No Delay","to": 1
},
{"key": "little Delay","from": 1,"to": 46
},
{"key": "medium Delay","from": 46,"to": 121
},{"key": "high Delay","from": 121
}
]
}
}
}
}
### C - Dashboard in Kibana ###
# C-01
# Create a map in Kibana, displaying the weight in minutes of the cumulative delay per airport worldwide.
# Hints: 1°) Use the geospatial coordinates to both aggregate the cumulative delays per airport
# and also display each airport on the worldwide map in Kibana
# 2°) Assign a proper color scale
#
# ====> /!\ Send a screenshot with Kansas City|Chicago|Detroit on the same map (Kansas city on the left of the map and Detroit on the right and Chicago in the middle) <====
#