Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
monitor-NaviX
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ETP-HTC
monitor-NaviX
Commits
cf4acdf8
Commit
cf4acdf8
authored
6 years ago
by
Christoph Heidecker
Browse files
Options
Downloads
Patches
Plain Diff
* Added automatic download of input data files from remote
parent
f7618c0d
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
NaviXMon.py
+2
-1
2 additions, 1 deletion
NaviXMon.py
navimon/inputData.py
+113
-35
113 additions, 35 deletions
navimon/inputData.py
navimon/predictionScenario.py
+13
-1
13 additions, 1 deletion
navimon/predictionScenario.py
with
128 additions
and
37 deletions
NaviXMon.py
+
2
−
1
View file @
cf4acdf8
...
...
@@ -37,7 +37,8 @@ def __init__():
export_as
=
'
.pdf
'
# export_as = '.png'
workflow
=
'
copy60sg
'
workflow
=
'
test
'
# workflow = 'copy60sg'
# workflow = 'copy_topas'
# workflow = 'copy_nemo'
# workflow = 'copy_tsy_v1'
...
...
This diff is collapsed.
Click to expand it.
navimon/inputData.py
+
113
−
35
View file @
cf4acdf8
from
.commonFunctions
import
print_with_color
import
os
from
urllib.request
import
urlretrieve
from
urllib.parse
import
urlparse
class
InputData
:
...
...
@@ -6,8 +9,11 @@ class InputData:
def
__init__
(
self
,
workflow
):
self
.
navix_monitor_file_list
=
[
r
'
NaviX.mon
'
]
# log files to be loaded
# Load corresponding workflow defined in NaviMon.py
try
:
if
workflow
==
'
copy60sg
'
:
if
workflow
==
'
test
'
:
self
.
test
()
elif
workflow
==
'
copy60sg
'
:
self
.
copy60sg
()
elif
workflow
==
'
copy_topas
'
:
self
.
copy_topas
()
...
...
@@ -33,30 +39,74 @@ class InputData:
color
=
'
red
'
)
exit
(
-
1
)
# Download data if remote URL (e.g. http://...) was added to file list
try
:
for
index
,
input_file
in
enumerate
(
self
.
navix_monitor_file_list
):
print
(
'
Loading input data file list:
'
)
if
bool
(
urlparse
(
input_file
).
netloc
):
tmp_directory
=
'
.tmp_data/
'
+
str
(
workflow
)
tmp_file
=
tmp_directory
+
'
/
'
+
str
(
os
.
path
.
basename
(
input_file
))
# Check if tmp file already exists:
if
os
.
path
.
exists
(
tmp_file
):
print_with_color
(
'
Input data file
"'
+
str
(
input_file
)
+
'"
already downloaded to
'
+
str
(
tmp_file
),
color
=
'
grey
'
)
print_with_color
(
'
Using cached version of input data file!
'
,
color
=
'
yellow
'
)
else
:
print_with_color
(
'
Beginning file download for input data file
"'
+
str
(
input_file
)
+
'"
...
'
,
color
=
'
grey
'
)
if
not
os
.
path
.
exists
(
tmp_directory
):
try
:
os
.
makedirs
(
tmp_directory
)
except
Exception
as
e
:
print_with_color
(
'
Could bn
'
,
color
=
'
red
'
)
urlretrieve
(
input_file
,
tmp_file
)
self
.
navix_monitor_file_list
[
index
]
=
tmp_file
else
:
pass
except
Exception
as
e
:
print_with_color
(
"
Python-Error:
"
+
str
(
e
)
+
"
\n
Failed to get input data from remote URLs!
"
,
color
=
'
red
'
)
exit
(
-
1
)
def
test
(
self
):
# File for test scenario
self
.
navix_monitor_file_list
=
[
r
'
http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/test.mon
'
]
def
copy60sg
(
self
):
# Copy tests with tuned SSD Raid0:
# -------------------------------------
# -> run 2: all files were already cached (60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/copy/
ekp
sg/02-tuned-SSDs-Raid0/NaviX.mon.180815']
# self.navix_monitor_file_list = [r'data/copy/sg/02-tuned-SSDs-Raid0/NaviX.mon.180815']
# -> run 3: bug in update hook leads to percentage shift (60 jobs, 5 files/job),
# shift was corrected in data (60 jobs, 5 files/job)
# self.navix_monitor_file_list = [r'data/copy/ekpsg/03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-0%-corr',
# r'data/copy/ekpsg/03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-10%-90%-corr',
# r'data/copy/ekpsg/03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-100%-corr']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-0%-corr',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-10%-90%-corr',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-100%-corr']
# -> run 4: multiple runs for more statistics (2 runs, 60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/copy/ekpsg/04-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180818.newlog',
# r'data/copy/ekpsg/04-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180819.newlog']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'04-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180818.newlog',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'04-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180819.newlog']
# -> run 5: new monitoring log maybe with bug in update hook( 60 jobs, 5 or 10 files/job)
# self.navix_monitor_file_list = [r'data/copy/ekpsg/05-tuned-SSDs-Raid0-new-log/NaviX.mon.debug.copy.small.180823']
self
.
navix_monitor_file_list
=
[
r
'
data/copy/ekpsg/05-tuned-SSDs-Raid0-new-log/NaviX.mon.Copy.180824
'
]
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'05-tuned-SSDs-Raid0-new-log/NaviX.mon.debug.copy.small.180823']
self
.
navix_monitor_file_list
=
[
r
'
http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/
'
r
'
05-tuned-SSDs-Raid0-new-log/NaviX.mon.Copy.180824
'
]
def
copy_topas
(
self
):
# -> run 0:
# Too many jobs for all worker nodes together
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX_TOPAS_COPY_v0.mon']
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX_TOPAS_COPY_v1.mon']
self
.
navix_monitor_file_list
=
[
r
'
data/copy/topas/NaviX_TOPAS_COPY_v0.mon
'
,
r
'
data/copy/topas/NaviX_TOPAS_COPY_v1.mon
'
]
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/topas/'
# r'NaviX_TOPAS_COPY_v0.mon']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/topas/'
# r'NaviX_TOPAS_COPY_v1.mon']
self
.
navix_monitor_file_list
=
[
r
'
http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/topas/
'
r
'
NaviX_TOPAS_COPY_v0.mon
'
,
r
'
http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/topas/
'
r
'
NaviX_TOPAS_COPY_v1.mon
'
]
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX_TOPAS_small.mon']
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX_to_big.mon']
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX.mon.copyJob80Hold.20190111']
...
...
@@ -65,61 +115,89 @@ class InputData:
# -> run 1:
# Cleaned monitoring log of this run, since there were additional jobs in the monitoring log.
# Maybe this caused the weired behavior of the benchmarks.
# self.navix_monitor_file_list = [r'data/copy/NEMO/NaviX.run1.cleaned.mon']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/NEMO/
# NaviX.run1.cleaned.mon']
# -> run 2:
self
.
navix_monitor_file_list
=
[
r
'
data/copy/nemo/NaviX.run2.mon
'
]
self
.
navix_monitor_file_list
=
[
r
'
http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/nemo/
'
r
'
NaviX.run2.mon
'
]
def
copy_tsy_v1
(
self
):
# -> run 1:
# First test-run for bug-fixing, cache volume was too small
# self.navix_monitor_file_list = [r'data/copy/tsy/NaviX-2.mon.tsy']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/tsy/'
# r'NaviX-2.mon.tsy']
# Successful run with low remote and high cache transfer rate
self
.
navix_monitor_file_list
=
[
r
'
data/copy/tsy/NaviX-3.mon.tsy
'
]
self
.
navix_monitor_file_list
=
[
r
'
http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/tsy/
'
r
'
NaviX-3.mon.tsy
'
]
def
copy_tsy_v2
(
self
):
# -> run 2:
# Second run crashed
# self.navix_monitor_file_list = [r'data/copy/tsy/NaviX-4.mon.tsy.crashrun']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/tsy/'
# r'NaviX-4.mon.tsy.crashrun']
# Successful run with high remote and low cache transfer rate
self
.
navix_monitor_file_list
=
[
r
'
data/copy/tsy/NaviX-5.mon.tsy
'
]
self
.
navix_monitor_file_list
=
[
r
'
http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/tsy/
'
r
'
NaviX-5.mon.tsy
'
]
def
higgs60sg
(
self
):
# Higgs skimming tests with not-tuned SSD Raid0:
# ----------------------------------------------
# -> run 3:
# bug in update hook leads to percentage shift (60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-0%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-10%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-20%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-30%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-40%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-50%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-60%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-70%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-80%.mon']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-0%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-10%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-20%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-30%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-40%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-50%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-60%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-70%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-80%.mon']
# Higgs skimming tests with tuned SSD Raid0:
# ------------------------------------------
# -> run 4:
# bug in update hook leads to percentage shift (60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/skimming/ekpsg/04-tuned-SSDs-Raid0/NaviX.mon',
# r'data/skimming/ekpsg/04-tuned-SSDs-Raid0/NaviX2.mon']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'04-tuned-SSDs-Raid0/NaviX.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'04-tuned-SSDs-Raid0/NaviX2.mon']
# -> run 5:
# first monitoring file was split since it contained unknown manual tests (60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180817.2.newlog',
# r'data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180818.newlog',
# r'data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180817.2.newlog',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180818.newlog',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827']
# Danger: Some files were cached already. Hence some of the test runs are corrupted.
# First run of the last run period file (successful test run):
self
.
navix_monitor_file_list
=
[
r
'
data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827.run1
'
]
# self.navix_monitor_file_list = [r'data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827']
self
.
navix_monitor_file_list
=
[
r
'
http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/
'
r
'
05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827.run1
'
]
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827']
def
jec_sg
(
self
):
# -> run 1:
# TODO: Add run description and upload file
self
.
navix_monitor_file_list
=
[
r
'
data/jec/sg/NaviX.mon
'
]
def
jec_topas
(
self
):
# -> run 1:
# TODO: Add run description and upload file
self
.
navix_monitor_file_list
=
[
r
'
data/jec/topas/NaviX_TOPAS_JEC_v1.mon
'
,
r
'
data/jec/topas/NaviX_TOPAS_JEC_v2.mon
'
]
def
jec_nemo
(
self
):
# -> run 1:
# TODO: Add run description and upload file
self
.
navix_monitor_file_list
=
[
r
'
data/jec/nemo/NaviX.mon
'
]
This diff is collapsed.
Click to expand it.
navimon/predictionScenario.py
+
13
−
1
View file @
cf4acdf8
...
...
@@ -12,7 +12,9 @@ class Prediction:
self
.
workflow_rate_max
=
-
999.
# Maximum read rate the workflow allows per core
try
:
if
workflow
==
'
copy60sg
'
:
if
workflow
==
'
test
'
:
self
.
test
()
elif
workflow
==
'
copy60sg
'
:
self
.
copy60sg
()
elif
workflow
==
'
copy_topas
'
:
self
.
copy_topas
()
...
...
@@ -38,6 +40,16 @@ class Prediction:
color
=
'
red
'
)
exit
(
-
1
)
def
test
(
self
):
# Prediction for test scenario
self
.
data_total
=
1.
# Total amount of data accessed by all jobs
self
.
number_nodes
=
1.
# Number of HTCondor worker nodes connected to cache
self
.
number_slots_per_node
=
1.
# Number of HTCondor slots per worker node
self
.
remote_rate_total
=
1.
# Read rate accessing remote storage in MB/s
self
.
cache_rate_per_node
=
1.
# Cache read speed in MB/s
self
.
workflow_rate_max
=
1.
# Maximum data throughput that the workflow itself provides per core
self
.
workflow_rate_max
=
1.
# Maximum data throughput that the workflow itself provides per core
def
copy60sg
(
self
):
# Copy jobs with 60 jobs on SG/SM machines:
self
.
data_total
=
3600.
*
10.
*
60.
# about 3.6GB per file for /dev/null tests
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment