Skip to content

Commit 81378d8

Browse files
authored
Merge pull request #266 from Germano0/eos_accounting_cmsvoc_script
adds cmsvoc accounting scripts
2 parents 24815ca + 8bf0fdf commit 81378d8

File tree

2 files changed

+412
-0
lines changed

2 files changed

+412
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
# Germano Massullo - [email protected]
2+
# Dario Mapelli - [email protected]
3+
4+
# This script is running in acrontab in lxplus under the personal account (not 'cmsvoc' service account) of the CMS VOC, because such account has full permissions. This python script cannot be run directly on lxplus, the acrontab must run it from the following bash script
5+
"""
6+
#!bin/bash
7+
8+
# This script is run by acrontab on lxplus
9+
source /etc/profile
10+
python3 eos_accounting_cmsvoc.py
11+
"""
12+
# A very extensive explanation of this script is available at
13+
# https://its.cern.ch/jira/browse/CMSMONIT-521?focusedId=4724501&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-4724501
14+
# Its text cannot be pasted here for both reserved comments and lack of layout capabilities in a Python file compared to a comment in a Jira ticket.
15+
# A copy of this script is hosted for archival purposes at https://github.com/dmwm/CMSMonitoring
16+
17+
import os
18+
import json
19+
import logging
20+
import tempfile
21+
22+
23+
EXCLUDED_PATHS = ["/eos/cms/store/cmst3", "/eos/recovered", "/eos/totem"]
24+
25+
# many numbers comes from EOS tools in form of string. We need to convert them
26+
# to numbers (integer or float)
27+
# to be better handled by the CMS monitoring web page
28+
def convert_to_terabytes_and_cast_from_string_to_number__non_ec(dictionary_list):
29+
for item in dictionary_list:
30+
item['usedbytes'] = float(item['usedbytes']) / (10**12)
31+
item['usedlogicalbytes'] = float(item['usedlogicalbytes']) / (10**12)
32+
item['maxbytes'] = float(item['maxbytes']) / (10**12)
33+
item['maxlogicalbytes'] = float(item['maxlogicalbytes']) / (10**12)
34+
try:
35+
item['used_logical_over_used_raw_percentage'] = item['usedlogicalbytes'] / item['usedbytes'] * 100
36+
except:
37+
item['used_logical_over_used_raw_percentage'] = None
38+
item['usedterabytes'] = item.pop('usedbytes')
39+
item['usedlogicalterabytes'] = item.pop('usedlogicalbytes')
40+
item['maxterabytes'] = item.pop('maxbytes')
41+
item['maxlogicalterabytes'] = item.pop('maxlogicalbytes')
42+
item['percentageusedbytes'] = float(item['percentageusedbytes'])
43+
item['percentageusedterabytes'] = item.pop('percentageusedbytes')
44+
item['usedfiles'] = int(item['usedfiles'])
45+
item['maxfiles'] = int(item['maxfiles'])
46+
return dictionary_list
47+
48+
# many numbers comes from EOS tools in form of string. We need to convert them
49+
# to numbers (integer or float)
50+
# to be better handled by the CMS monitoring web page
51+
# =====
52+
# the reason why following key names do not contain the word "terabytes"
53+
# instead of "bytes" is # because the frontend will break as it expects
54+
# the key names that are currently present in this function
55+
def convert_to_terabytes_and_cast_from_string_to_number__ec(dictionary_list):
56+
for item in dictionary_list:
57+
item['max_logical_quota'] = float(item['max_logical_quota']) / (10**12)
58+
item['max_physical_quota'] = item['max_logical_quota'] * 12 / 10
59+
item['total_used_logical_bytes'] = float(item['total_used_logical_bytes']) / (10**12)
60+
item['logical_rep_bytes'] = float(item['logical_rep_bytes']) / (10**12)
61+
item['logical_ec_bytes'] = float(item['logical_ec_bytes']) / (10**12)
62+
item['total_used_physical_bytes'] = float(item['total_used_physical_bytes']) / (10**12)
63+
item['physical_rep_bytes'] = float(item['physical_rep_bytes']) / (10**12)
64+
item['physical_ec_bytes'] = float(item['physical_ec_bytes']) / (10**12)
65+
item['free_physical'] = float(item['free_physical']) / (10**12)
66+
item['free_physical_for_ec'] = float(item['free_physical_for_ec']) / (10**12)
67+
item['free_physical_for_rep'] = float(item['free_physical_for_rep']) / (10**12)
68+
item['free_logical'] = float(item['free_logical']) / (10**12)
69+
try:
70+
item['used_logical_over_used_raw_percentage'] = item['total_used_logical_bytes'] / item['total_used_physical_bytes'] * 100
71+
except:
72+
item['used_logical_over_used_raw_percentage'] = None
73+
item['total_used_logical_terabytes'] = item.pop('total_used_logical_bytes')
74+
item['logical_rep_terabytes'] = item.pop('logical_rep_bytes')
75+
item['logical_ec_terabytes'] = item.pop('logical_ec_bytes')
76+
item['total_used_physical_terabytes'] = item.pop('total_used_physical_bytes')
77+
item['physical_rep_terabytes'] = item.pop('physical_rep_bytes')
78+
item['physical_ec_terabytes'] = item.pop('physical_ec_bytes')
79+
return dictionary_list
80+
81+
82+
def get_eos_ec_quota_dump():
83+
try:
84+
accounting_file = open("/eos/cms/store/accounting/cms_quota_dump.txt", "r")
85+
86+
except:
87+
logging.exception('Cannot get the eos quota ls output from EOS')
88+
89+
with open(accounting_file.name) as file:
90+
lines = file.readlines()
91+
dictionary_list = []
92+
for line in lines:
93+
line = line.strip()
94+
line = line.split(' ')
95+
keys_values_single_line = dict(s.split('=') for s in line)
96+
dictionary_list.append(keys_values_single_line)
97+
accounting_file.close()
98+
return dictionary_list
99+
100+
101+
def get_eos_quota_ls_output():
102+
dictionary_list_temp = []
103+
dictionary_list = []
104+
accounting_file = tempfile.NamedTemporaryFile()
105+
try:
106+
# export EOSHOME="" is needed to avoid getting the following two messages everytime the command is run
107+
# =====
108+
# pre-configuring default route to /eos/user/c/cmsvoc/
109+
# -use $EOSHOME variable to override
110+
# =====
111+
os.system('export EOSHOME="" && eos -r 103074 1399 quota ls -m > %s' % accounting_file.name)
112+
113+
except:
114+
logging.exception('Cannot get the eos quota ls output from EOS')
115+
116+
with open(accounting_file.name) as file:
117+
lines = file.readlines()
118+
for line in lines:
119+
line = line.strip()
120+
line = line.split(' ')
121+
keys_values_single_line = dict(s.split('=') for s in line)
122+
dictionary_list_temp.append(keys_values_single_line)
123+
accounting_file.close()
124+
i = 0
125+
"""
126+
each xrdcp entry, when in eos quota ls, it either has gid=all or gid=project, attribute not both of them
127+
Concerning this, Jaroslav Guenther said:
128+
"That is expected, it is a special quota type which does not
129+
allow any other quota node to be defined on the same path. Project
130+
quota books all volume/inode usage under the project subtree to a single
131+
project account (gid 99). E.g. the recycle bin uses this quota type."
132+
"""
133+
while i < len(dictionary_list_temp):
134+
if( ("gid" in dictionary_list_temp[i]) and ( (dictionary_list_temp[i]["gid"] == "ALL") or (dictionary_list_temp[i]["gid"] == "project") ) ):
135+
dictionary_list.append(dictionary_list_temp[i])
136+
i = i + 1
137+
138+
# "eos quota ls" uses 'space' instead of 'path' as attribute name for folders. The following cycle changes this to 'path', so that later is
139+
# easier to write code to compare various outputs
140+
for x in dictionary_list:
141+
x['path'] = x['space']
142+
del x['space']
143+
return dictionary_list
144+
145+
146+
def get_xrdcp_output():
147+
accounting_file = tempfile.NamedTemporaryFile()
148+
try:
149+
# export EOSHOME="" is needed to avoid getting the following two messages everytime the command is run
150+
# =====
151+
# pre-configuring default route to /eos/user/c/cmsvoc/
152+
# -use $EOSHOME variable to override
153+
# =====
154+
155+
#os.system('export EOSHOME="" && xrdcp root://eoscms.cern.ch//eos/cms/proc/accounting - > %s' % accounting_file.name)
156+
os.system('XRD_CPUSEPGWRTRD=0 xrdcp --nopbar root://eoscms.cern.ch//eos/cms/proc/accounting - > %s' % accounting_file.name)
157+
158+
except:
159+
logging.exception('Cannot get the xrdcp output from EOS')
160+
161+
with open(accounting_file.name) as json_file:
162+
json_data_temp = json.load(json_file)
163+
accounting_file.close()
164+
data = json_data_temp['storageservice']['storageshares']
165+
"""
166+
Due how EOS returns JSON output, each data element contains a 'path' key
167+
which value is in form of
168+
["foo"] (so a list) instead of "foo" (so a string).
169+
This adds useless complexity, so must be removed.
170+
item['path'][0] returns "foo", instead item['path'] returns ["foo"]
171+
That's why [0] is used
172+
"""
173+
for item in data:
174+
item['path'] = item['path'][0]
175+
return data
176+
177+
178+
def match_xrdcp_and_eos_quota_ls_entries():
179+
xrdcp_data = get_xrdcp_output()
180+
eos_quota_ls_data = get_eos_quota_ls_output()
181+
eos_ec_quota = get_eos_ec_quota_dump()
182+
183+
xrdcp_data_paths = [ i['path'] for i in xrdcp_data]
184+
eos_quota_ls_data_paths = [ j['path'] for j in eos_quota_ls_data]
185+
eos_ec_quota_paths = [ k['quota_node'] for k in eos_ec_quota]
186+
187+
xrdcp_paths_set = set(xrdcp_data_paths)
188+
eos_quota_ls_paths_set = set(eos_quota_ls_data_paths)
189+
eos_ec_quota_paths_set = set(eos_ec_quota_paths)
190+
191+
print(eos_quota_ls_paths_set - xrdcp_paths_set)
192+
print("eos_quota_ls_paths_set length is",len(eos_quota_ls_paths_set))
193+
print("xrdcp_paths_set length is",len(xrdcp_paths_set))
194+
print("eos_ec_quota_paths_set length is",len(eos_ec_quota_paths_set))
195+
print("xrdcp & eos_ec_quota_paths_set length is",len(xrdcp_paths_set & eos_ec_quota_paths_set))
196+
197+
198+
def get_non_ec_statistics():
199+
xrdcp_data = get_xrdcp_output()
200+
eos_quota_ls_data = get_eos_quota_ls_output()
201+
eos_ec_quota = get_eos_ec_quota_dump()
202+
203+
xrdcp_data_paths = [ i['path'] for i in xrdcp_data]
204+
eos_quota_ls_data_paths = [ j['path'] for j in eos_quota_ls_data]
205+
eos_ec_quota_paths = [ k['quota_node'] for k in eos_ec_quota]
206+
207+
xrdcp_paths_set = set(xrdcp_data_paths)
208+
eos_quota_ls_paths_set = set(eos_quota_ls_data_paths)
209+
eos_ec_quota_paths_set = set(eos_ec_quota_paths)
210+
paths = xrdcp_paths_set - eos_ec_quota_paths_set
211+
212+
results = list(filter(lambda x: x['path'] in paths, eos_quota_ls_data))
213+
return convert_to_terabytes_and_cast_from_string_to_number__non_ec(results)
214+
215+
216+
def get_ec_statistics():
217+
results = get_eos_ec_quota_dump()
218+
return convert_to_terabytes_and_cast_from_string_to_number__ec(results)
219+
220+
# this function uses the nomenclature of non EC JSON
221+
def produce_summary():
222+
ec_statistics = get_ec_statistics()
223+
non_ec_statistics = get_non_ec_statistics()
224+
total = {'path' : 'TOTAL', 'usedterabytes' : 0.0, 'usedlogicalterabytes' : 0.0, 'maxlogicalterabytes' : 0.0, 'maxphysicalterabytes' : 0.0, 'used_logical_over_used_raw_percentage' : 0.0, 'used_logical_space_percentage' : 0.0}
225+
226+
for item in non_ec_statistics:
227+
if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)):
228+
del non_ec_statistics[non_ec_statistics.index(item)]
229+
continue
230+
del item['gid']
231+
del item['maxfiles']
232+
del item['quota']
233+
del item['usedfiles']
234+
del item['statusbytes']
235+
del item['statusfiles']
236+
item['used_logical_space_percentage'] = item.pop('percentageusedterabytes')
237+
item['maxphysicalterabytes'] = item.pop('maxterabytes')
238+
total['usedterabytes'] += item['usedterabytes']
239+
total['usedlogicalterabytes'] += item['usedlogicalterabytes']
240+
total['maxlogicalterabytes'] += item['maxlogicalterabytes']
241+
total['maxphysicalterabytes'] += item['maxphysicalterabytes']
242+
for item in ec_statistics:
243+
item['path'] = item.pop('quota_node')
244+
if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)):
245+
del ec_statistics[ec_statistics.index(item)]
246+
continue
247+
item['maxlogicalterabytes'] = item.pop('max_logical_quota')
248+
item['maxphysicalterabytes'] = item.pop('max_physical_quota')
249+
item['usedlogicalterabytes'] = item.pop('total_used_logical_terabytes')
250+
del item['logical_rep_terabytes']
251+
del item['logical_ec_terabytes']
252+
item['usedterabytes'] = item.pop('total_used_physical_terabytes')
253+
del item['physical_rep_terabytes']
254+
del item['physical_ec_terabytes']
255+
del item['free_physical']
256+
del item['free_physical_for_ec']
257+
del item['free_physical_for_rep']
258+
del item['free_logical']
259+
try:
260+
item['used_logical_space_percentage'] = item['usedlogicalterabytes'] * 100 / item['maxlogicalterabytes']
261+
except:
262+
item['used_logical_space_percentage'] = None
263+
total['usedterabytes'] += item['usedterabytes']
264+
total['usedlogicalterabytes'] += item['usedlogicalterabytes']
265+
total['maxlogicalterabytes'] += item['maxlogicalterabytes']
266+
total['maxphysicalterabytes'] += item['maxphysicalterabytes']
267+
try:
268+
total['used_logical_over_used_raw_percentage'] = total['usedlogicalterabytes'] / total['usedterabytes'] * 100
269+
except:
270+
total['used_logical_over_used_raw_percentage'] = None
271+
try:
272+
total['used_logical_space_percentage'] = total['usedlogicalterabytes'] * 100 / total['maxlogicalterabytes']
273+
except:
274+
total['used_logical_space_percentage'] = None
275+
276+
for item in non_ec_statistics:
277+
if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)):
278+
del non_ec_statistics[non_ec_statistics.index(item)]
279+
for item in non_ec_statistics:
280+
if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)):
281+
del non_ec_statistics[non_ec_statistics.index(item)]
282+
for item in non_ec_statistics:
283+
if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)):
284+
del non_ec_statistics[non_ec_statistics.index(item)]
285+
for item in non_ec_statistics:
286+
if (any(item['path'].startswith(x) for x in EXCLUDED_PATHS)):
287+
del non_ec_statistics[non_ec_statistics.index(item)]
288+
# the [] brackets around total are needed to convert it into a list,
289+
# otherwise the addition operator will not work among a list and a dictionary
290+
return non_ec_statistics + ec_statistics + [total]
291+
292+
with open('/eos/cms/store/accounting/eos_ec_accounting.json', 'w') as json_ec_output:
293+
json.dump(get_ec_statistics(), json_ec_output, indent=4)
294+
295+
with open('/eos/cms/store/accounting/eos_non_ec_accounting.json', 'w') as json_non_ec_output:
296+
json.dump(get_non_ec_statistics(), json_non_ec_output, indent=4)
297+
298+
with open('/eos/cms/store/accounting/eos_accounting_summary.json', 'w') as json_summary_output:
299+
json.dump(produce_summary(), json_summary_output, indent=4)

0 commit comments

Comments
 (0)