-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscheduler.py
93 lines (71 loc) · 3.14 KB
/
scheduler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from datetime import timedelta
import json
from playwright import sync_playwright
from dateutil.parser import parse
def parse_dates(dia:str, start_hour: str, end_hour: str):
"""Build datetime from day string plus hour and minute string """
start = parse(start_hour)
end = parse(end_hour)
# 16/11/2020 + 19h + 30m
start_datetime = parse(dia) + timedelta(hours=start.hour, minutes=start.minute)
# 16/11/2020 + 20h + 0m
end_datetime = parse(dia) + timedelta(hours=end.hour, minutes=end.minute)
return start_datetime, end_datetime
def get_talks_schedule():
charlas = []
with sync_playwright() as p:
browser = p.chromium.launch()
page = browser.newPage()
# Visit Schedule Page
page.goto('https://eventos.python.org.ar/events/pyconar2020/schedule')
# Get all divs with the schedule of the day
days = page.querySelectorAll('css=div .well')
for day in days:
# Get the day header text
dia = day.querySelector('h2').innerText().strip()
print(dia)
dia = dia.split('- ')[1] # Día 1 - 16/11/2020 --> 16/11/2020
# Get all talks from that day.
charlas_divs = day.querySelectorAll('css=div .fc-content')
for charla in charlas_divs:
"""
Expected html
<div class="fc-time" data-start="19:00" data-full="7:00 PM - 8:00 PM">
<span>19:00 - 20:00</span>
</div>
<div class="fc-title">¿Por que no se hacen mas tests?</div>
"""
# For each talk, get its schedule and its title
horario = charla.querySelector('.fc-time').innerText()
start, end = horario.split(' - ')
start_date, end_date = parse_dates(dia, start, end)
titulo = charla.querySelector('.fc-title').innerText()
print('Charla ', titulo)
charlas.append({
'day': str(dia),
'titulo': titulo,
'start': start_date.isoformat(),
'end': end_date.isoformat(),
})
browser.close()
return charlas
def dump_talks_with_schedule_info(schedule):
with open('pyconar/data/charlas.json', 'r') as f:
all_charlas = json.load(f)
title_to_charla = {charla['titulo']: charla for charla in schedule}
for charla in all_charlas['2020']:
# Add schedule info
name = charla['title']
schedule_of_charla = title_to_charla[name]
charla['schedule'] = {
'time': schedule_of_charla['day'],
'start': schedule_of_charla['start'],
'end': schedule_of_charla['end'],
}
nextTalks = all_charlas['2020']
nextTalks = sorted(nextTalks, key=lambda x: x['schedule']['start'])
assert len(nextTalks) == len(schedule), 'Not all charlas were scheduled'
with open('pyconar/data/2020_schedule.json', 'w') as f:
json.dump(nextTalks , f, indent=4, ensure_ascii=False)
charlas_schedule = get_talks_schedule()
dump_talks_with_schedule_info(charlas_schedule)