Skip to content
Snippets Groups Projects
Commit cf4acdf8 authored by Christoph Heidecker's avatar Christoph Heidecker
Browse files

* Added automatic download of input data files from remote

parent f7618c0d
Branches
No related tags found
No related merge requests found
......@@ -37,7 +37,8 @@ def __init__():
export_as = '.pdf'
# export_as = '.png'
workflow = 'copy60sg'
workflow = 'test'
# workflow = 'copy60sg'
# workflow = 'copy_topas'
# workflow = 'copy_nemo'
# workflow = 'copy_tsy_v1'
......
from .commonFunctions import print_with_color
import os
from urllib.request import urlretrieve
from urllib.parse import urlparse
class InputData:
......@@ -6,8 +9,11 @@ class InputData:
def __init__(self, workflow):
self.navix_monitor_file_list = [r'NaviX.mon'] # log files to be loaded
# Load corresponding workflow defined in NaviMon.py
try:
if workflow == 'copy60sg':
if workflow == 'test':
self.test()
elif workflow == 'copy60sg':
self.copy60sg()
elif workflow == 'copy_topas':
self.copy_topas()
......@@ -33,30 +39,74 @@ class InputData:
color='red')
exit(-1)
# Download data if remote URL (e.g. http://...) was added to file list
try:
for index, input_file in enumerate(self.navix_monitor_file_list):
print('Loading input data file list:')
if bool(urlparse(input_file).netloc):
tmp_directory = '.tmp_data/' + str(workflow)
tmp_file = tmp_directory + '/' + str(os.path.basename(input_file))
# Check if tmp file already exists:
if os.path.exists(tmp_file):
print_with_color('Input data file "' + str(input_file) + '" already downloaded to ' +
str(tmp_file), color='grey')
print_with_color('Using cached version of input data file!', color='yellow')
else:
print_with_color('Beginning file download for input data file "' + str(input_file) + '" ...',
color='grey')
if not os.path.exists(tmp_directory):
try:
os.makedirs(tmp_directory)
except Exception as e:
print_with_color('Could bn', color='red')
urlretrieve(input_file, tmp_file)
self.navix_monitor_file_list[index] = tmp_file
else:
pass
except Exception as e:
print_with_color("Python-Error: " + str(e) + "\nFailed to get input data from remote URLs!",
color='red')
exit(-1)
def test(self):
# File for test scenario
self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/test.mon']
def copy60sg(self):
# Copy tests with tuned SSD Raid0:
# -------------------------------------
# -> run 2: all files were already cached (60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/copy/ekpsg/02-tuned-SSDs-Raid0/NaviX.mon.180815']
# self.navix_monitor_file_list = [r'data/copy/sg/02-tuned-SSDs-Raid0/NaviX.mon.180815']
# -> run 3: bug in update hook leads to percentage shift (60 jobs, 5 files/job),
# shift was corrected in data (60 jobs, 5 files/job)
# self.navix_monitor_file_list = [r'data/copy/ekpsg/03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-0%-corr',
# r'data/copy/ekpsg/03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-10%-90%-corr',
# r'data/copy/ekpsg/03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-100%-corr']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-0%-corr',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-10%-90%-corr',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'03-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180817-100%-corr']
# -> run 4: multiple runs for more statistics (2 runs, 60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/copy/ekpsg/04-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180818.newlog',
# r'data/copy/ekpsg/04-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180819.newlog']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'04-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180818.newlog',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'04-tuned-SSDs-Raid0-bugfix/NaviX.mon.Copy.180819.newlog']
# -> run 5: new monitoring log maybe with bug in update hook( 60 jobs, 5 or 10 files/job)
# self.navix_monitor_file_list = [r'data/copy/ekpsg/05-tuned-SSDs-Raid0-new-log/NaviX.mon.debug.copy.small.180823']
self.navix_monitor_file_list = [r'data/copy/ekpsg/05-tuned-SSDs-Raid0-new-log/NaviX.mon.Copy.180824']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
# r'05-tuned-SSDs-Raid0-new-log/NaviX.mon.debug.copy.small.180823']
self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/sg/'
r'05-tuned-SSDs-Raid0-new-log/NaviX.mon.Copy.180824']
def copy_topas(self):
# -> run 0:
# Too many jobs for all worker nodes together
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX_TOPAS_COPY_v0.mon']
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX_TOPAS_COPY_v1.mon']
self.navix_monitor_file_list = [r'data/copy/topas/NaviX_TOPAS_COPY_v0.mon',
r'data/copy/topas/NaviX_TOPAS_COPY_v1.mon']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/topas/'
# r'NaviX_TOPAS_COPY_v0.mon']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/topas/'
# r'NaviX_TOPAS_COPY_v1.mon']
self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/topas/'
r'NaviX_TOPAS_COPY_v0.mon',
r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/topas/'
r'NaviX_TOPAS_COPY_v1.mon']
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX_TOPAS_small.mon']
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX_to_big.mon']
# self.navix_monitor_file_list = [r'data/copy/topas/NaviX.mon.copyJob80Hold.20190111']
......@@ -65,61 +115,89 @@ class InputData:
# -> run 1:
# Cleaned monitoring log of this run, since there were additional jobs in the monitoring log.
# Maybe this caused the weired behavior of the benchmarks.
# self.navix_monitor_file_list = [r'data/copy/NEMO/NaviX.run1.cleaned.mon']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/NEMO/
# NaviX.run1.cleaned.mon']
# -> run 2:
self.navix_monitor_file_list = [r'data/copy/nemo/NaviX.run2.mon']
self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/nemo/'
r'NaviX.run2.mon']
def copy_tsy_v1(self):
# -> run 1:
# First test-run for bug-fixing, cache volume was too small
# self.navix_monitor_file_list = [r'data/copy/tsy/NaviX-2.mon.tsy']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/tsy/'
# r'NaviX-2.mon.tsy']
# Successful run with low remote and high cache transfer rate
self.navix_monitor_file_list = [r'data/copy/tsy/NaviX-3.mon.tsy']
self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/tsy/'
r'NaviX-3.mon.tsy']
def copy_tsy_v2(self):
# -> run 2:
# Second run crashed
# self.navix_monitor_file_list = [r'data/copy/tsy/NaviX-4.mon.tsy.crashrun']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/tsy/'
# r'NaviX-4.mon.tsy.crashrun']
# Successful run with high remote and low cache transfer rate
self.navix_monitor_file_list = [r'data/copy/tsy/NaviX-5.mon.tsy']
self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/copy/tsy/'
r'NaviX-5.mon.tsy']
def higgs60sg(self):
# Higgs skimming tests with not-tuned SSD Raid0:
# ----------------------------------------------
# -> run 3:
# bug in update hook leads to percentage shift (60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-0%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-10%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-20%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-30%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-40%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-50%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-60%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-70%.mon',
# r'data/skimming/ekpsg/03_not-tuned-SSDs-Raid0/NaviX-80%.mon']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-0%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-10%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-20%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-30%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-40%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-50%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-60%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-70%.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'03_not-tuned-SSDs-Raid0/NaviX-80%.mon']
# Higgs skimming tests with tuned SSD Raid0:
# ------------------------------------------
# -> run 4:
# bug in update hook leads to percentage shift (60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/skimming/ekpsg/04-tuned-SSDs-Raid0/NaviX.mon',
# r'data/skimming/ekpsg/04-tuned-SSDs-Raid0/NaviX2.mon']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'04-tuned-SSDs-Raid0/NaviX.mon',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'04-tuned-SSDs-Raid0/NaviX2.mon']
# -> run 5:
# first monitoring file was split since it contained unknown manual tests (60 jobs, 10 files/job)
# self.navix_monitor_file_list = [r'data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180817.2.newlog',
# r'data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180818.newlog',
# r'data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180817.2.newlog',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180818.newlog',
# r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827']
# Danger: Some files were cached already. Hence some of the test runs are corrupted.
# First run of the last run period file (successful test run):
self.navix_monitor_file_list = [r'data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827.run1']
# self.navix_monitor_file_list = [r'data/skimming/ekpsg/05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827']
self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
r'05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827.run1']
# self.navix_monitor_file_list = [r'http://ekpwww.etp.kit.edu/~cheidecker/Caching-Benchmarks/data/higgs/sg/'
# r'05-tuned-SSDs-Raid0/NaviX.mon.Skimming.180827']
def jec_sg(self):
# -> run 1:
# TODO: Add run description and upload file
self.navix_monitor_file_list = [r'data/jec/sg/NaviX.mon']
def jec_topas(self):
# -> run 1:
# TODO: Add run description and upload file
self.navix_monitor_file_list = [r'data/jec/topas/NaviX_TOPAS_JEC_v1.mon',
r'data/jec/topas/NaviX_TOPAS_JEC_v2.mon']
def jec_nemo(self):
# -> run 1:
# TODO: Add run description and upload file
self.navix_monitor_file_list = [r'data/jec/nemo/NaviX.mon']
......@@ -12,7 +12,9 @@ class Prediction:
self.workflow_rate_max = -999. # Maximum read rate the workflow allows per core
try:
if workflow == 'copy60sg':
if workflow == 'test':
self.test()
elif workflow == 'copy60sg':
self.copy60sg()
elif workflow == 'copy_topas':
self.copy_topas()
......@@ -38,6 +40,16 @@ class Prediction:
color='red')
exit(-1)
def test(self):
# Prediction for test scenario
self.data_total = 1. # Total amount of data accessed by all jobs
self.number_nodes = 1. # Number of HTCondor worker nodes connected to cache
self.number_slots_per_node = 1. # Number of HTCondor slots per worker node
self.remote_rate_total = 1. # Read rate accessing remote storage in MB/s
self.cache_rate_per_node = 1. # Cache read speed in MB/s
self.workflow_rate_max = 1. # Maximum data throughput that the workflow itself provides per core
self.workflow_rate_max = 1. # Maximum data throughput that the workflow itself provides per core
def copy60sg(self):
# Copy jobs with 60 jobs on SG/SM machines:
self.data_total = 3600. * 10. * 60. # about 3.6GB per file for /dev/null tests
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment