-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathream.py
139 lines (111 loc) · 3.64 KB
/
ream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""Exports important chats from your Telegram account.
Takes no arguments, since all configuration is provided through ream.toml.
"""
import asyncio
import json
import logging
import tomllib
from contextlib import suppress
from pathlib import Path
import telethon
from telethon.hints import EntityLike
from serialization.serialization import serialize
log = logging.getLogger(__name__)
async def export(client: telethon.TelegramClient, chat: EntityLike) -> None:
"""Export data from a Telegram chat.
Parameters
----------
client : telethon.TelegramClient
The Telegram client.
chat : EntityLike
The chat to export.
"""
entity = await client.get_entity(chat)
path = Path(f"{config['export']['path']}/{entity.id}")
export_json = path / "export.json"
if export_json.exists():
chat_data = json.load(export_json.open())
last_message = chat_data["messages"][-1]["id"] if chat_data["messages"] else 0
else:
export_json.parent.mkdir(exist_ok=True, parents=True)
chat_data = {
"name": entity.first_name,
"type": "personal_chat",
"id": entity.id,
"messages": [],
}
last_message = 0
# Close the takeout session if one is already open. If it's not open,
# `client.end_takeout` will raise a TypeError, so it's suppressed.
with suppress(TypeError):
await client.end_takeout(success=False)
async with client.takeout(
contacts=True,
users=True,
files=True,
max_file_size=config["export"]["max_file_size"],
) as takeout:
messages = takeout.iter_messages(
chat,
reverse=True,
offset_id=last_message,
)
batch_size = config["export"]["batch_size"]
batch = []
async for message in messages:
batch.append(message)
if len(batch) >= batch_size:
tasks = [serialize(message, path) for message in batch]
chat_data["messages"] += await asyncio.gather(*tasks)
export_json.write_text(
json.dumps(
chat_data,
indent=1,
ensure_ascii=False,
),
encoding="utf-8",
)
batch = []
if batch:
tasks = [serialize(message, path) for message in batch]
chat_data["messages"] += await asyncio.gather(*tasks)
export_json.write_text(
json.dumps(
chat_data,
indent=1,
ensure_ascii=False,
),
encoding="utf-8",
)
async def __main(client: telethon.TelegramClient) -> None:
if (
"ream" in config
and "log_level" in config["ream"]
and config["ream"]["log_level"]
in {
"NOTESET",
"DEBUG",
"INFO",
"WARNING",
"ERROR",
"CRITICAL",
}
):
logging.basicConfig(level=config["ream"]["log_level"])
else:
logging.basicConfig(level=logging.INFO)
await client.get_dialogs()
for chat in config["export"]["chats"]:
log.info("Exporting chat %s...", chat)
await export(client, chat)
if __name__ == "__main__":
with Path("ream.toml").open("rb") as f:
config = tomllib.load(f)
client = telethon.TelegramClient(
"ream",
config["api"]["app_id"],
config["api"]["app_hash"],
app_version="1.0.0",
)
with client:
client.loop.run_until_complete(__main(client))