Skip to content

Commit 0ebe375

Browse files
committed
new updates
1 parent 3f8978a commit 0ebe375

7 files changed

+1144
-1108
lines changed

Scraping Country Names using Selenium.ipynb

+20-18
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,18 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 1,
12+
"execution_count": 3,
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
1616
"from selenium import webdriver\n",
17+
"from selenium.webdriver.common.by import By\n",
1718
"import pandas as pd"
1819
]
1920
},
2021
{
2122
"cell_type": "code",
22-
"execution_count": 15,
23+
"execution_count": 4,
2324
"metadata": {},
2425
"outputs": [],
2526
"source": [
@@ -36,26 +37,27 @@
3637
},
3738
{
3839
"cell_type": "code",
39-
"execution_count": 2,
40+
"execution_count": 5,
4041
"metadata": {},
4142
"outputs": [],
4243
"source": [
43-
"path = 'C:\\\\Chromedriver.exe'"
44+
"# path = 'C:\\\\Chromedriver.exe'"
4445
]
4546
},
4647
{
4748
"cell_type": "code",
48-
"execution_count": 3,
49+
"execution_count": 6,
4950
"metadata": {},
5051
"outputs": [],
5152
"source": [
5253
"# set path for the driver\n",
53-
"browser = webdriver.Chrome(executable_path = path)"
54+
"# browser = webdriver.Chrome(executable_path = path)\n",
55+
"browser = webdriver.Chrome()"
5456
]
5557
},
5658
{
5759
"cell_type": "code",
58-
"execution_count": 4,
60+
"execution_count": 7,
5961
"metadata": {},
6062
"outputs": [],
6163
"source": [
@@ -64,7 +66,7 @@
6466
},
6567
{
6668
"cell_type": "code",
67-
"execution_count": 5,
69+
"execution_count": 8,
6870
"metadata": {},
6971
"outputs": [],
7072
"source": [
@@ -81,18 +83,18 @@
8183
},
8284
{
8385
"cell_type": "code",
84-
"execution_count": 2,
86+
"execution_count": 9,
8587
"metadata": {},
8688
"outputs": [],
8789
"source": [
8890
"# get country names\n",
89-
"country_list = browser.find_elements_by_xpath(\"//h3[@class='country-name']\")\n",
91+
"country_list = browser.find_elements(By.XPATH, \"//h3[@class='country-name']\")\n",
9092
"# country_list"
9193
]
9294
},
9395
{
9496
"cell_type": "code",
95-
"execution_count": 3,
97+
"execution_count": 12,
9698
"metadata": {},
9799
"outputs": [],
98100
"source": [
@@ -107,18 +109,18 @@
107109
},
108110
{
109111
"cell_type": "code",
110-
"execution_count": 4,
112+
"execution_count": 13,
111113
"metadata": {},
112114
"outputs": [],
113115
"source": [
114116
"# # get the population for the country\n",
115-
"population_list = browser.find_elements_by_class_name('country-population')\n",
117+
"population_list = browser.find_elements(By.CLASS_NAME,'country-population')\n",
116118
"# population_list"
117119
]
118120
},
119121
{
120122
"cell_type": "code",
121-
"execution_count": 5,
123+
"execution_count": 14,
122124
"metadata": {},
123125
"outputs": [],
124126
"source": [
@@ -140,7 +142,7 @@
140142
},
141143
{
142144
"cell_type": "code",
143-
"execution_count": 12,
145+
"execution_count": 15,
144146
"metadata": {},
145147
"outputs": [
146148
{
@@ -207,7 +209,7 @@
207209
"4 Anguilla 13254"
208210
]
209211
},
210-
"execution_count": 12,
212+
"execution_count": 15,
211213
"metadata": {},
212214
"output_type": "execute_result"
213215
}
@@ -249,7 +251,7 @@
249251
},
250252
{
251253
"cell_type": "code",
252-
"execution_count": 14,
254+
"execution_count": 16,
253255
"metadata": {},
254256
"outputs": [],
255257
"source": [
@@ -266,7 +268,7 @@
266268
],
267269
"metadata": {
268270
"kernelspec": {
269-
"display_name": "Python 3",
271+
"display_name": "Python 3 (ipykernel)",
270272
"language": "python",
271273
"name": "python3"
272274
},

Scraping IMDB Top Movies using Beautiful Soup.ipynb

+1,019-1,010
Large diffs are not rendered by default.

Scraping Multimedia Files using Beautiful Soup.ipynb

+16-16
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
},
6565
{
6666
"cell_type": "code",
67-
"execution_count": 1,
67+
"execution_count": 5,
6868
"metadata": {},
6969
"outputs": [],
7070
"source": [
@@ -74,7 +74,7 @@
7474
},
7575
{
7676
"cell_type": "code",
77-
"execution_count": 3,
77+
"execution_count": 6,
7878
"metadata": {},
7979
"outputs": [],
8080
"source": [
@@ -85,16 +85,16 @@
8585
},
8686
{
8787
"cell_type": "code",
88-
"execution_count": 10,
88+
"execution_count": 7,
8989
"metadata": {},
9090
"outputs": [
9191
{
9292
"data": {
9393
"text/plain": [
94-
"<source media=\"(min-width: 1600px)\" sizes=\"960px\" srcset=\"https://th.thgim.com/news/national/ebs7fg/article34673100.ece/ALTERNATES/FREE_960/Migrants2jpg\"/>"
94+
"<source media=\"(min-width: 1600px)\" sizes=\"960px\" srcset=\"https://th-i.thgim.com/public/news/national/2g2qwq/article53557510.ece/alternates/LANDSCAPE_1200/Migrants2jpg\"/>"
9595
]
9696
},
97-
"execution_count": 10,
97+
"execution_count": 7,
9898
"metadata": {},
9999
"output_type": "execute_result"
100100
}
@@ -107,16 +107,16 @@
107107
},
108108
{
109109
"cell_type": "code",
110-
"execution_count": 11,
110+
"execution_count": 8,
111111
"metadata": {},
112112
"outputs": [
113113
{
114114
"data": {
115115
"text/plain": [
116-
"'https://th.thgim.com/news/national/ebs7fg/article34673100.ece/ALTERNATES/FREE_960/Migrants2jpg'"
116+
"'https://th-i.thgim.com/public/news/national/2g2qwq/article53557510.ece/alternates/LANDSCAPE_1200/Migrants2jpg'"
117117
]
118118
},
119-
"execution_count": 11,
119+
"execution_count": 8,
120120
"metadata": {},
121121
"output_type": "execute_result"
122122
}
@@ -127,7 +127,7 @@
127127
},
128128
{
129129
"cell_type": "code",
130-
"execution_count": 12,
130+
"execution_count": 9,
131131
"metadata": {},
132132
"outputs": [],
133133
"source": [
@@ -143,7 +143,7 @@
143143
},
144144
{
145145
"cell_type": "code",
146-
"execution_count": 13,
146+
"execution_count": 10,
147147
"metadata": {},
148148
"outputs": [],
149149
"source": [
@@ -152,7 +152,7 @@
152152
},
153153
{
154154
"cell_type": "code",
155-
"execution_count": 14,
155+
"execution_count": 11,
156156
"metadata": {},
157157
"outputs": [],
158158
"source": [
@@ -171,7 +171,7 @@
171171
},
172172
{
173173
"cell_type": "code",
174-
"execution_count": 15,
174+
"execution_count": 12,
175175
"metadata": {},
176176
"outputs": [],
177177
"source": [
@@ -180,7 +180,7 @@
180180
},
181181
{
182182
"cell_type": "code",
183-
"execution_count": 16,
183+
"execution_count": 13,
184184
"metadata": {},
185185
"outputs": [],
186186
"source": [
@@ -198,7 +198,7 @@
198198
},
199199
{
200200
"cell_type": "code",
201-
"execution_count": 17,
201+
"execution_count": 14,
202202
"metadata": {},
203203
"outputs": [],
204204
"source": [
@@ -207,7 +207,7 @@
207207
},
208208
{
209209
"cell_type": "code",
210-
"execution_count": 19,
210+
"execution_count": 15,
211211
"metadata": {},
212212
"outputs": [],
213213
"source": [
@@ -226,7 +226,7 @@
226226
],
227227
"metadata": {
228228
"kernelspec": {
229-
"display_name": "Python 3",
229+
"display_name": "Python 3 (ipykernel)",
230230
"language": "python",
231231
"name": "python3"
232232
},

Scraping Products from Amazon using Selenium-Dynamic Website.ipynb

+21-19
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"outputs": [],
1515
"source": [
1616
"from selenium import webdriver\n",
17+
"from selenium.webdriver.common.by import By\n",
1718
"from time import sleep"
1819
]
1920
},
@@ -30,7 +31,7 @@
3031
"metadata": {},
3132
"outputs": [],
3233
"source": [
33-
"path = 'C://chromedriver.exe'"
34+
"# path = 'C://chromedriver.exe'"
3435
]
3536
},
3637
{
@@ -40,7 +41,8 @@
4041
"outputs": [],
4142
"source": [
4243
"# open the browser\n",
43-
"browser = webdriver.Chrome(executable_path=path)"
44+
"# browser = webdriver.Chrome(executable_path = path)\n",
45+
"browser = webdriver.Chrome()"
4446
]
4547
},
4648
{
@@ -69,8 +71,8 @@
6971
"outputs": [],
7072
"source": [
7173
"# get the input elements\n",
72-
"input_search = browser.find_element_by_id('twotabsearchtextbox')\n",
73-
"search_button = browser.find_element_by_xpath(\"(//input[@type='submit'])[1]\")"
74+
"input_search = browser.find_element(By.ID, 'twotabsearchtextbox')\n",
75+
"search_button = browser.find_element(By.XPATH, \"(//input[@type='submit'])[1]\")"
7476
]
7577
},
7678
{
@@ -94,7 +96,7 @@
9496
},
9597
{
9698
"cell_type": "code",
97-
"execution_count": 11,
99+
"execution_count": 8,
98100
"metadata": {},
99101
"outputs": [
100102
{
@@ -118,26 +120,26 @@
118120
"products = []\n",
119121
"for i in range(10):\n",
120122
" print('Scraping page', i+1)\n",
121-
" product = browser.find_elements_by_xpath(\"//span[@class='a-size-medium a-color-base a-text-normal']\")\n",
123+
" product = browser.find_elements(By.XPATH, \"//span[@class='a-size-medium a-color-base a-text-normal']\")\n",
122124
" for p in product:\n",
123125
" products.append(p.text)\n",
124-
" next_button = browser.find_element_by_xpath(\"//a[text()='Next']\")\n",
126+
" next_button = browser.find_element(By.XPATH, \"//a[text()='Next']\")\n",
125127
" next_button.click()\n",
126128
" sleep(2)"
127129
]
128130
},
129131
{
130132
"cell_type": "code",
131-
"execution_count": 12,
133+
"execution_count": 9,
132134
"metadata": {},
133135
"outputs": [
134136
{
135137
"data": {
136138
"text/plain": [
137-
"165"
139+
"186"
138140
]
139141
},
140-
"execution_count": 12,
142+
"execution_count": 9,
141143
"metadata": {},
142144
"output_type": "execute_result"
143145
}
@@ -148,20 +150,20 @@
148150
},
149151
{
150152
"cell_type": "code",
151-
"execution_count": 13,
153+
"execution_count": 10,
152154
"metadata": {},
153155
"outputs": [
154156
{
155157
"data": {
156158
"text/plain": [
157-
"['Redmi 10A (Slate Grey, 4GB RAM, 64GB Storage) | 2 Ghz Octa Core Helio G25 | 5000 mAh Battery | Finger Print Sensor | Upto 5GB RAM with RAM Booster',\n",
158-
" 'Redmi 10 Power (Power Black, 8GB RAM, 128GB Storage)',\n",
159-
" 'Redmi A1 (Light Blue, 2GB RAM, 32GB Storage) | Segment Best AI Dual Cam | 5000mAh Battery | Leather Texture Design | Android 12',\n",
160-
" 'Redmi 9A Sport (Coral Green, 2GB RAM, 32GB Storage) | 2GHz Octa-core Helio G25 Processor | 5000 mAh Battery',\n",
161-
" 'Redmi 10A (Slate Grey, 4GB RAM, 64GB Storage) | 2 Ghz Octa Core Helio G25 | 5000 mAh Battery | Finger Print Sensor | Upto 5GB RAM with RAM Booster']"
159+
"['Lava Blaze 2 (6GB RAM, 128GB Storage) - Glass Blue | 18W Fast Charging | 6.5 inch 90Hz Punch Hole Display | Side Fingerprint Sensor | Upto 11GB Expandable RAM | 5000 mAh Battery',\n",
160+
" 'Lava Yuva 2 Pro (Glass Lavender, 4GB RAM, 64GB Storage)| 2.3 Ghz Octa Core Helio G37| 13 MP AI Triple Camera |Fingerprint Sensor| 5000 mAh Battery| Upto 7GB Expandable RAM',\n",
161+
" 'realme narzo N53 (Feather Black, 4GB+64GB) 33W Segment Fastest Charging | Slimmest Phone in Segment | 90 Hz Smooth Display',\n",
162+
" 'realme narzo 50i Prime (Dark Blue 4GB RAM+64GB Storage) Octa-core Processor | 5000 mAh Battery',\n",
163+
" 'Redmi A2 (Aqua Blue, 2GB RAM, 32GB Storage) | Powerful Octa Core G36 Processor | Upto 7GB RAM | Large 16.5 cm HD+ Display with Massive 5000mAh Battery | 2 Years Warranty [Limited time Offer]']"
162164
]
163165
},
164-
"execution_count": 13,
166+
"execution_count": 10,
165167
"metadata": {},
166168
"output_type": "execute_result"
167169
}
@@ -172,7 +174,7 @@
172174
},
173175
{
174176
"cell_type": "code",
175-
"execution_count": 15,
177+
"execution_count": 11,
176178
"metadata": {},
177179
"outputs": [],
178180
"source": [
@@ -189,7 +191,7 @@
189191
],
190192
"metadata": {
191193
"kernelspec": {
192-
"display_name": "Python 3",
194+
"display_name": "Python 3 (ipykernel)",
193195
"language": "python",
194196
"name": "python3"
195197
},

0 commit comments

Comments
 (0)