-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape_subjects.py
86 lines (69 loc) · 2.9 KB
/
scrape_subjects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Copyright 2024 Angie Tran, Diego Cid
#
# This file is part of Beach Buddy.
# Beach Buddy is free software: you can redistribute it and/or modify
# it under the terms of the MIT License as published by
# the Free Software Foundation, either version 1 of the License, or
# (at your option) any later version.
#
# Beach Buddy is distributed in the hope that it will be useful,
# but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# See the MIT License for more details.
#
# You should have received a copy of the MIT License
# along with Beach Buddy. If not, see <https://mit-license.org/>.
from html_scraper import *
import time
def scrape_fall(subjects_file):
csv_file = "fall_2024.csv"
file_path = f"seasons/{csv_file}"
# Deletes the csv to create a new one to append to
delete_csv_file(file_path)
with open(subjects_file, 'r') as file:
for line in file:
data = line.strip().split(', ')
# retrieve course abbreviations
course_abr = data[1]
# link to request html
subject_html = "http://web.csulb.edu/depts/enrollment/registration/class_schedule/Fall_2024/By_Subject/" \
+ course_abr + ".html"
write_data_to_file(file_path, subject_html)
def scrape_summer(subjects_file):
csv_file = "summer_2024.csv"
file_path = f"seasons/{csv_file}"
# Deletes the csv to create a new one to append to
delete_csv_file(file_path)
with open(subjects_file, 'r') as file:
for line in file:
data = line.strip().split(', ')
# retrieve course abbreviations
course_abr = data[1]
# link to request html
subject_html = "http://web.csulb.edu/depts/enrollment/registration/class_schedule/Summer_2024/By_Subject/" \
+ course_abr + ".html"
write_data_to_file(file_path, subject_html)
def scrape_spring(subjects_file):
csv_file = "spring_2025.csv"
file_path = f"seasons/{csv_file}"
# Deletes the csv to create a new one to append to
delete_csv_file(file_path)
with open(subjects_file, 'r') as file:
for line in file:
data = line.strip().split(', ')
# retrieve course abbreviations
course_abr = data[1]
# link to request html
subject_html = "https://web.csulb.edu/depts/enrollment/registration/class_schedule/Spring_2025/By_Subject/" \
+ course_abr + ".html"
write_data_to_file(file_path, subject_html)
def main():
subjects_file = "subjects.csv"
scrape_fall(subjects_file)
scrape_summer(subjects_file)
scrape_spring(subjects_file)
if __name__ == "__main__":
start_time = time.time()
main()
print("--- %s seconds ---" % (time.time() - start_time))