Skip to content

Commit 6e435de

Browse files
Code updated for lesson 06-08
1 parent d1fa038 commit 6e435de

12 files changed

+2243
-1759
lines changed

Diff for: Lesson06/Activity01.ipynb

+40-30
Large diffs are not rendered by default.

Diff for: Lesson06/Exercise01.ipynb

+76-107
Large diffs are not rendered by default.

Diff for: Lesson06/Exercise02.ipynb

+476-42
Large diffs are not rendered by default.

Diff for: Lesson07/Activity01.ipynb

+1,327
Large diffs are not rendered by default.

Diff for: Lesson07/Activity1.ipynb

-1,316
This file was deleted.

Diff for: Lesson07/Exercise1.ipynb renamed to Lesson07/Exercise01.ipynb

+135-125
Large diffs are not rendered by default.

Diff for: Lesson07/Exercise2.ipynb renamed to Lesson07/Exercise02.ipynb

+50-40
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,22 @@
1414
},
1515
"cells": [
1616
{
17+
"cell_type": "markdown",
18+
"metadata": {
19+
"id": "OyyWh-4jHlcI",
20+
"colab_type": "text"
21+
},
22+
"source": [
23+
"## Exercise 2: Missing Value Preprocessing with High Reproducibility"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
1728
"metadata": {
1829
"id": "E0PhwttvxbGz",
1930
"colab_type": "code",
2031
"colab": {}
2132
},
22-
"cell_type": "code",
2333
"source": [
2434
"import numpy as np\n",
2535
"import pandas as pd"
@@ -28,33 +38,33 @@
2838
"outputs": []
2939
},
3040
{
41+
"cell_type": "code",
3142
"metadata": {
3243
"id": "Cq_pPqioxdTv",
3344
"colab_type": "code",
3445
"colab": {}
3546
},
36-
"cell_type": "code",
3747
"source": [
3848
"df = pd.read_csv('https://raw.githubusercontent.com/TrainingByPackt/Big-Data-Analysis-with-Python/master/Lesson07/Dataset/bank.csv', sep=';')"
3949
],
4050
"execution_count": 0,
4151
"outputs": []
4252
},
4353
{
54+
"cell_type": "code",
4455
"metadata": {
4556
"id": "kO8KhEfmxpgm",
4657
"colab_type": "code",
58+
"outputId": "f5026961-ddf5-416f-9401-fbd377720950",
4759
"colab": {
4860
"base_uri": "https://localhost:8080/",
4961
"height": 204
50-
},
51-
"outputId": "b373d2f4-35b8-48b0-c25d-257297f5483d"
62+
}
5263
},
53-
"cell_type": "code",
5464
"source": [
5565
"df.head(5)"
5666
],
57-
"execution_count": 6,
67+
"execution_count": 3,
5868
"outputs": [
5969
{
6070
"output_type": "execute_result",
@@ -221,17 +231,17 @@
221231
"metadata": {
222232
"tags": []
223233
},
224-
"execution_count": 6
234+
"execution_count": 3
225235
}
226236
]
227237
},
228238
{
239+
"cell_type": "code",
229240
"metadata": {
230241
"id": "rQhGcWosxrkX",
231242
"colab_type": "code",
232243
"colab": {}
233244
},
234-
"cell_type": "code",
235245
"source": [
236246
"import collections\n",
237247
"import random\n",
@@ -244,12 +254,12 @@
244254
"outputs": []
245255
},
246256
{
257+
"cell_type": "code",
247258
"metadata": {
248259
"id": "Q2rSdiA80Gp_",
249260
"colab_type": "code",
250261
"colab": {}
251262
},
252-
"cell_type": "code",
253263
"source": [
254264
"for row, col in ix:\n",
255265
" if len(replaced[row]) < df.shape[1] - 1:\n",
@@ -263,58 +273,58 @@
263273
"outputs": []
264274
},
265275
{
276+
"cell_type": "code",
266277
"metadata": {
267278
"id": "xWHplf2_0IMe",
268279
"colab_type": "code",
280+
"outputId": "c8dc8b1b-eedf-4727-c0b2-c4d705719fe3",
269281
"colab": {
270282
"base_uri": "https://localhost:8080/",
271283
"height": 323
272-
},
273-
"outputId": "0f5f40f8-8195-40c2-8f4d-eba87d9db113"
284+
}
274285
},
275-
"cell_type": "code",
276286
"source": [
277287
"print(df.isna().sum())\n"
278288
],
279-
"execution_count": 12,
289+
"execution_count": 6,
280290
"outputs": [
281291
{
282292
"output_type": "stream",
283293
"text": [
284-
"age 461\n",
285-
"job 470\n",
286-
"marital 462\n",
287-
"education 462\n",
288-
"default 486\n",
289-
"balance 460\n",
290-
"housing 475\n",
291-
"loan 444\n",
292-
"contact 443\n",
293-
"day 438\n",
294-
"month 414\n",
295-
"duration 446\n",
296-
"campaign 496\n",
297-
"pdays 403\n",
298-
"previous 429\n",
299-
"poutcome 443\n",
300-
"y 454\n",
294+
"age 459\n",
295+
"job 503\n",
296+
"marital 487\n",
297+
"education 477\n",
298+
"default 458\n",
299+
"balance 425\n",
300+
"housing 442\n",
301+
"loan 420\n",
302+
"contact 479\n",
303+
"day 429\n",
304+
"month 420\n",
305+
"duration 461\n",
306+
"campaign 478\n",
307+
"pdays 444\n",
308+
"previous 400\n",
309+
"poutcome 453\n",
310+
"y 451\n",
301311
"dtype: int64\n"
302312
],
303313
"name": "stdout"
304314
}
305315
]
306316
},
307317
{
318+
"cell_type": "code",
308319
"metadata": {
309320
"id": "gy3hrHu40J__",
310321
"colab_type": "code",
322+
"outputId": "0f295ef2-74ac-40c8-cf5f-f01bdbbd9ed0",
311323
"colab": {
312324
"base_uri": "https://localhost:8080/",
313325
"height": 2193
314-
},
315-
"outputId": "69f35783-00ff-4a34-aa34-8bd91301bcfd"
326+
}
316327
},
317-
"cell_type": "code",
318328
"source": [
319329
"num = df._get_numeric_data()\n",
320330
"Q1 = num.quantile(0.25)\n",
@@ -323,7 +333,7 @@
323333
"print(num < (Q1 - 1.5 * IQR))\n",
324334
"print(num > (Q3 + 1.5 * IQR))\n"
325335
],
326-
"execution_count": 13,
336+
"execution_count": 7,
327337
"outputs": [
328338
{
329339
"output_type": "stream",
@@ -395,19 +405,19 @@
395405
" age balance day duration campaign pdays previous\n",
396406
"0 False False False False False False False\n",
397407
"1 False True False False False True True\n",
398-
"2 False False False False False True False\n",
408+
"2 False False False False False True True\n",
399409
"3 False False False False False False False\n",
400410
"4 False False False False False False False\n",
401411
"5 False False False False False True True\n",
402-
"6 False False False False False True True\n",
412+
"6 False False False False False False True\n",
403413
"7 False False False False False False False\n",
404414
"8 False False False False False False False\n",
405415
"9 False False False False False True True\n",
406416
"10 False True False False False False False\n",
407417
"11 False False False False False False False\n",
408418
"12 False False False False False False False\n",
409419
"13 False False False False False False False\n",
410-
"14 False False False False False True True\n",
420+
"14 False False False False False True False\n",
411421
"15 False False False False False False False\n",
412422
"16 False True False False False False False\n",
413423
"17 False False False False False True True\n",
@@ -428,7 +438,7 @@
428438
"4492 False False False False False True True\n",
429439
"4493 False False False False False False False\n",
430440
"4494 False False False False False False False\n",
431-
"4495 False False False False False False False\n",
441+
"4495 False False False True False False False\n",
432442
"4496 False False False False False False False\n",
433443
"4497 False False False False False False False\n",
434444
"4498 False False False True False False False\n",
@@ -452,7 +462,7 @@
452462
"4516 False False False False False False False\n",
453463
"4517 False False False False False False False\n",
454464
"4518 False False False False True False False\n",
455-
"4519 False False False False False True True\n",
465+
"4519 False False False False False False True\n",
456466
"4520 False False False False False True True\n",
457467
"\n",
458468
"[4521 rows x 7 columns]\n"
@@ -462,4 +472,4 @@
462472
]
463473
}
464474
]
465-
}
475+
}

0 commit comments

Comments
 (0)