Skip to content
Snippets Groups Projects
Commit 32941fc9 authored by Christoph Heidecker's avatar Christoph Heidecker
Browse files

* Replaced data rate and total file size calculation by a more performant one

parent a8ef5b14
Branches
No related tags found
No related merge requests found
......@@ -24,9 +24,14 @@ class EvaluateMonitoring:
self.save_plots = True
try:
self.get_file_size()
self.get_data_rate()
except Exception as e:
print("Python-Error: " + str(e) + "\ncould not get size of files -> abort")
print("Python-Error: " + str(e) + "\ncould not get data rate of jobs -> abort")
exit()
try:
self.get_total_file_size()
except Exception as e:
print("Python-Error: " + str(e) + "\ncould not get total file size requested by jobs -> abort")
exit()
try:
self.get_cached_percentage_on_worker_node()
......@@ -90,24 +95,20 @@ class EvaluateMonitoring:
+ str(column))
pass
def get_file_size(self):
for index, row in self.df.iterrows():
input_files = eval(str(row['input_files']))
network_input = float(row['NetworkInputMb'])
file_names_for_row = []
file_size_for_row = []
for value in input_files:
file_names_for_row.append(value.rsplit(":", 1)[0])
file_size_for_row.append(value.rsplit(":", 1)[1])
total_file_size_for_row = float(sum(int(i) for i in file_size_for_row)) / (1024 * 1024)
self.df.at[index, 'size_of_all_requested_files'] = total_file_size_for_row
self.df.at[index, 'data_rate'] = (
# use HTCondor measured network input value:
network_input / float(row['Runtime']) if float(row['Runtime']) > 0 and network_input > 0 else np.nan
# use total size of requested files:
# total_file_size_for_row / float(row['Runtime']) if float(row['Runtime']) > 0 else -1
)
self.df.at[index, 'total_file_size'] = total_file_size_for_row
def get_data_rate(self):
try:
self.df['data_rate'] = self.df.loc[self.df['Runtime'] > 0.0]['NetworkInputMb'].divide(self.df['Runtime'])
except Exception as e:
print("Python-Error: " + str(e) + "\nFailed to calculate data rate of jobs!")
exit(-1)
def get_total_file_size(self):
try:
self.df['total_file_size'] = self.df['input_files'].apply(lambda file_list: sum(map(
lambda x: float(x.rsplit(":", 1)[1]) / (1024 * 1024), eval(file_list))))
except Exception as e:
print("Python-Error: " + str(e) + "\nFailed to calculate total file size requested by jobs!")
exit(-1)
def get_cached_percentage_on_worker_node(self):
self.df["cached_percentage_on_worker_node"] = 0.0
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment