-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathopenaire.py
85 lines (68 loc) · 2.2 KB
/
openaire.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# radboud university is a mess because they don't separate their UMC,
# so we will try and clean it up with openaire.
import time
import json
import pandas as pd
import requests
import attention_list as api_targets
access_token = ""
page = 1
df = pd.DataFrame()
dothis = True
def make_access_token():
global access_token
url = api_targets.OPENAIRE_REFRESHER + api_targets.OPENAIRE_RT
print(url, "\n")
response = requests.get(url, headers={"accept": "application/json"})
my_data = response.json()
access_token = my_data['access_token']
print(access_token)
def api_call(my_year):
global access_token, page
# params = {
# "search": "OpenAIRE Graph",
# "type": "publication",
# "fromPublicationDate": str(my_year) + "01-01",
# "toPublicationDate": str(my_year) + "-12-31",
# "relOrganizationId": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX",
# "pageSize": "100",
# "page": str(page)
# }
params = {
"search": "OpenAIRE Graph",
"type": "publication",
"fromPublicationDate": str(my_year) + "-01-01",
"toPublicationDate": str(my_year) + "-12-31",
"relCollectedFromDatasourceId": "opendoar____%253A%253A7bccfde7714a1ebadf06c5f4cea752c1",
"pageSize": "100",
"page": str(page)
}
headers = {
"accept": "application/json",
"Authorization": "Bearer " + access_token
}
url = api_targets.OPENAIRE_ENDPOINT
response = requests.get(url, params=params, headers=headers)
my_data = response.json()
print(json.dumps(my_data, indent=4, sort_keys=True))
return my_data
def unpack(my_data):
global dothis, page, df
page = int(my_data['header']['page'])
size = int(my_data['header']['pageSize'])
total = int(my_data['header']['numFound'])
if page > total // size:
dothis = False
else:
page += 1
if my_data['results']:
df = df.append(my_data.results, ignore_index=True)
print(dothis)
make_access_token()
for year in range(2016, 2024):
dothis = True
while dothis:
data = api_call(year)
unpack(data)
time.sleep(5)
df.to_csv("./openaire_output/radboud.csv", index=False)