-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathutils.py
65 lines (46 loc) · 1.72 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import logging
import mimetypes
import boto3
import numpy as np
from botocore.exceptions import ClientError
import config
boto_session = boto3.Session()
s3_client = boto_session.client('s3')
class InvalidS3Bucket(Exception):
pass
def upload_file(file_name, bucket, object_name=None):
if object_name is None:
object_name = file_name
try:
content_type = mimetypes.guess_type(file_name)[0] or 'binary/octet-stream'
s3_client.upload_file(file_name, bucket, object_name, ExtraArgs={
'GrantRead': f'id={config.BRAVE_TODAY_CLOUDFRONT_CANONICAL_ID}',
'GrantFullControl': f'id={config.BRAVE_TODAY_CANONICAL_ID}',
'ContentType': content_type
})
except ClientError as e:
logging.error(e)
return False
return True
def download_file(file_name, bucket, object_name=None):
if object_name is None:
object_name = file_name
try:
s3_client.download_file(bucket, object_name, file_name)
except ClientError as e:
logging.error(e)
return False
return True
def clean_source_similarity_file(sources_data, sources_sim_data):
sources_id = [sources.get("publisher_id") for sources in sources_data]
for s_id in sources_id:
if s_id not in sources_sim_data:
sources_sim_data.pop(s_id, None)
continue
if s_id in sources_sim_data:
for index, suggestion in enumerate(sources_sim_data[s_id]):
if suggestion["source"] not in sources_id:
sources_sim_data[s_id].pop(index)
return sources_sim_data
def get_source_id_for_title(title, sources_df):
return sources_df[sources_df.publisher_name == title].publisher_id.to_numpy()[0]