Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
F
fastNLO
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Container registry
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
qcd-public
fastNLO
Commits
80e8ee4f
Commit
80e8ee4f
authored
4 years ago
by
Klaus Rabbertz
Browse files
Options
Downloads
Patches
Plain Diff
Work on event rate summary plot
parent
a026f022
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
tools/plotting/fastnnlo_runtime.py
+142
-75
142 additions, 75 deletions
tools/plotting/fastnnlo_runtime.py
with
142 additions
and
75 deletions
tools/plotting/fastnnlo_runtime.py
+
142
−
75
View file @
80e8ee4f
...
...
@@ -4,6 +4,8 @@
import
glob
import
argparse
import
glob
import
os
import
re
import
sys
import
matplotlib
as
mpl
import
matplotlib.gridspec
as
gridspec
...
...
@@ -51,6 +53,14 @@ import matplotlib.pyplot as plt
# numpy
import
numpy
as
np
# Redefine ScalarFormatter
class
ScalarFormatterForceFormat
(
ScalarFormatter
):
# Override function that finds format to use.
def
_set_format
(
self
,
vmin
,
vmax
):
self
.
format
=
"
%1.2f
"
# Give format here
# Action class to allow comma-separated (or empty) list in options
class
SplitArgs
(
argparse
.
Action
):
def
__call__
(
self
,
parser
,
namespace
,
values
,
option_string
=
None
):
...
...
@@ -62,6 +72,9 @@ class SplitArgs(argparse.Action):
# Some global definitions
_debug
=
False
_formats
=
{
'
eps
'
:
0
,
'
pdf
'
:
1
,
'
png
'
:
2
,
'
svg
'
:
3
}
_channels
=
[
'
LO
'
,
'
R
'
,
'
V
'
,
'
RRa
'
,
'
RRb
'
,
'
RV
'
,
'
VV
'
,
'
ALL
'
]
_channel_number
=
{
'
LO
'
:
0
,
'
R
'
:
1
,
'
V
'
:
2
,
'
RRa
'
:
3
,
'
RRb
'
:
4
,
'
RV
'
:
5
,
'
VV
'
:
6
}
_channel_colors
=
[
'
tab:green
'
,
'
tab:cyan
'
,
'
tab:blue
'
,
'
tab:red
'
,
'
tab:orange
'
,
'
tab:pink
'
,
'
tab:purple
'
]
#####################################################################################
...
...
@@ -77,7 +90,16 @@ def main():
args
=
arguments
()
# extract correct paths for input and outputfiles
logfiles
=
get_files
(
args
[
'
logfiles
'
])
files
=
get_files
(
args
[
'
logfiles
'
])
logfiles
=
[]
runfiles
=
[]
for
file
in
files
:
runfile
=
re
.
sub
(
'
.log$
'
,
'
.run
'
,
file
)
if
not
os
.
path
.
isfile
(
runfile
):
print
(
'
[fastnnlo_runtime]: WARNING! No matching runcard found for log file {}, skipped!
'
)
else
:
logfiles
.
append
(
file
)
runfiles
.
append
(
runfile
)
outputpath
=
args
[
'
output
'
]
print
(
'
[fastnnlo_runtime]: Output path argument is: {}
'
.
format
(
outputpath
))
outputname
=
args
[
'
filename
'
]
...
...
@@ -100,15 +122,17 @@ def main():
# get all the information from logfiles as dict
# dict contains: runtime, runtime_unit, channel, events
loginformation
=
get_loginformation
(
logfiles
)
runinformation
=
get_runinformation
(
runfiles
)
info
=
{
**
loginformation
,
**
runinformation
}
# plot all the information
if
args
[
'
CPUtime
'
]:
plot_elapsed_time
(
log
info
rmation
,
outputpath
,
outputname
,
formats
)
plot_elapsed_time
(
info
,
outputpath
,
outputname
,
formats
)
if
args
[
'
Events
'
]:
plot_events_per_hour
(
log
info
rmation
,
outputpath
,
outputname
,
formats
)
plot_events_per_hour
(
info
,
outputpath
,
outputname
,
formats
)
if
not
args
[
'
CPUtime
'
]
and
not
args
[
'
Events
'
]:
plot_elapsed_time
(
log
info
rmation
,
outputpath
,
outputname
,
formats
)
plot_events_per_hour
(
log
info
rmation
,
outputpath
,
outputname
,
formats
)
plot_elapsed_time
(
info
,
outputpath
,
outputname
,
formats
)
plot_events_per_hour
(
info
,
outputpath
,
outputname
,
formats
)
exit
(
0
)
...
...
@@ -143,17 +167,16 @@ def get_files(files):
print
(
'
fastnnlo_runtime: ERROR! Aborted, only one log file found: {}
'
.
format
(
files
[
0
]))
exit
(
3
)
# sort unsorted glob list
files
.
sort
()
return
files
def
get_loginformation
(
files
):
run_time
=
[]
number_events
=
[]
channel
=
None
runtimes
=
[]
for
file
in
files
:
event
=
False
run_time_temp
=
[]
runtimes_temp
=
[]
with
open
(
file
)
as
origin
:
for
line
in
origin
:
...
...
@@ -165,138 +188,182 @@ def get_loginformation(files):
seconds
=
float
(
line
[
3
])
if
hours
!=
0.
:
run
_
time_temp
.
append
(
hours
+
minutes
/
60
+
seconds
/
360
)
runtime
s
_temp
.
append
(
hours
+
minutes
/
60
+
seconds
/
360
)
unit
=
'
hours
'
else
:
run
_
time_temp
.
append
(
minutes
+
seconds
/
60
)
runtime
s
_temp
.
append
(
minutes
+
seconds
/
60
)
unit
=
'
minutes
'
# extract channel name
if
'
Tablename
'
in
line
and
not
channel
:
line
=
line
.
split
()
tablename
=
line
[
2
].
split
(
'
.
'
)
channel
=
tablename
[
0
]
+
'
.
'
+
tablename
[
1
]
# extract total events
if
'
ncalltot=
'
in
line
and
not
event
:
line
=
line
.
split
(
'
,
'
)
number_events
.
append
(
float
(
line
[
4
][
10
:]))
event
=
True
runtimes
.
append
(
runtimes_temp
[
-
1
])
runtimes
=
np
.
array
(
runtimes
)
run_time
.
append
(
run_time_temp
[
-
1
])
information
=
{
'
runtime
'
:
runtimes
,
'
runtime_unit
'
:
unit
}
return
information
def
get_runinformation
(
files
):
nevents
=
[]
channels
=
[]
for
file
in
files
:
with
open
(
file
)
as
origin
:
for
line
in
origin
:
# extract total events
if
'
Number of events
'
in
line
:
line
=
line
.
split
()
nevents
.
append
(
line
[
0
])
if
'
Job name id
'
in
line
:
line
=
line
.
split
()
parts
=
line
[
0
].
split
(
'
-
'
)
channels
.
append
(
parts
[
0
])
run_time
=
np
.
array
(
run_time
)
number_event
s
=
np
.
array
(
number_event
s
)
nevents
=
np
.
array
(
nevents
)
channel
s
=
np
.
array
(
channel
s
)
information
=
{
'
runtime
'
:
run_time
,
'
runtime_unit
'
:
unit
,
'
channel
'
:
channel
,
'
events
'
:
number_events
'
events
'
:
nevents
,
'
channels
'
:
channels
}
return
information
def
plot_elapsed_time
(
info
rmation
dict
,
out_path
,
out_name
,
formats
):
def
plot_elapsed_time
(
infodict
,
out_path
,
out_name
,
formats
):
time
=
informationdict
[
'
runtime
'
]
unit
=
informationdict
[
'
runtime_unit
'
]
channel
=
informationdict
[
'
channel
'
]
basename
=
'
runtime
'
times
=
infodict
[
'
runtime
'
]
unit
=
infodict
[
'
runtime_unit
'
]
channels
=
infodict
[
'
channels
'
]
# get relevant values
mean
=
np
.
mean
(
time
)
std
=
np
.
std
(
time
)
median
=
np
.
median
(
time
)
iqd
=
np
.
subtract
(
*
np
.
percentile
(
time
,
[
75
,
25
],
interpolation
=
'
linear
'
))
/
2.
mean
=
np
.
mean
(
time
s
)
std
=
np
.
std
(
time
s
)
median
=
np
.
median
(
time
s
)
iqd
=
np
.
subtract
(
*
np
.
percentile
(
time
s
,
[
75
,
25
],
interpolation
=
'
linear
'
))
/
2.
CPUtime
=
np
.
sum
(
time
)
/
(
1
if
unit
==
'
hours
'
else
60
)
CPUtime
=
np
.
sum
(
time
s
)
/
(
1
if
unit
==
'
hours
'
else
60
)
# set figure
fig
=
plt
.
figure
(
figsize
=
(
16
,
12
))
ax
=
fig
.
gca
()
# plot histogram
n
,
batches
,
_
=
ax
.
hist
(
time
,
bins
=
20
,
color
=
'
deepskyblue
'
,
edgecolor
=
'
black
'
,
label
=
'
Total CPU time: {0:0.0f} hours
'
.
format
(
CPUtime
))
n
,
batches
,
_
=
ax
.
hist
(
time
s
,
bins
=
20
,
color
=
'
deepskyblue
'
,
edgecolor
=
'
black
'
,
label
=
'
Total CPU time: {0:0.0f} hours
'
.
format
(
CPUtime
))
# plot mean and median
ax
.
vlines
(
mean
,
0
,
max
(
n
),
colors
=
'
red
'
,
linestyles
=
'
dashed
'
,
label
=
r
'
Mean: {0:0.1f}$\pm${2:0.1f} {1}
'
.
format
(
mean
,
unit
,
std
))
ax
.
vlines
(
median
,
0
,
max
(
n
),
colors
=
'
green
'
,
linestyles
=
'
dash
e
d
'
,
label
=
r
'
Median: {0:0.1f}$\pm${2:0.1f} {1}
'
.
format
(
median
,
unit
,
iqd
))
ax
.
vlines
(
median
,
0
,
max
(
n
),
colors
=
'
green
'
,
linestyles
=
'
dashd
ot
'
,
label
=
r
'
Median: {0:0.1f}$\pm${2:0.1f} {1}
'
.
format
(
median
,
unit
,
iqd
))
# finish and save figure
chnlabel
=
channel
chnlabel
=
channel
s
[
0
]
if
out_name
:
chnlabel
=
out_name
ax
.
set_title
(
'
Elapsed time of
'
+
chnlabel
+
'
production
'
,
fontsize
=
20
)
ax
.
set_xlabel
(
'
CPU time [
'
+
unit
+
'
]
'
,
horizontalalignment
=
'
right
'
,
x
=
1.0
,
verticalalignment
=
'
top
'
,
y
=
1.0
,
fontsize
=
20
)
ax
.
set_ylabel
(
'
frequency
'
,
horizontalalignment
=
'
right
'
,
x
=
1.0
,
verticalalignment
=
'
top
'
,
y
=
1.0
,
fontsize
=
20
)
ax
.
set_ylabel
(
'
# jobs
'
,
horizontalalignment
=
'
right
'
,
x
=
1.0
,
verticalalignment
=
'
top
'
,
y
=
1.0
,
fontsize
=
20
,
labelpad
=
20
)
ax
.
set_yscale
(
'
log
'
)
ax
.
tick_params
(
axis
=
'
both
'
,
which
=
'
major
'
,
labelsize
=
20
)
ax
.
ticklabel_format
(
axis
=
'
x
'
,
style
=
'
plain
'
,
useOffset
=
False
)
ax
.
legend
(
loc
=
'
best
'
,
fontsize
=
20
)
ax
.
grid
()
ax
.
set_axisbelow
(
True
)
# set saving location
basename
=
'
runtime
'
for
fmt
in
formats
:
filename
=
out_path
+
(
''
if
out_path
[
-
1
]
==
'
/
'
else
'
/
'
)
if
out_name
:
filename
+=
out_name
+
'
.
'
+
basename
+
'
.
'
+
fmt
else
:
filename
+=
channel
+
'
.
'
+
basename
+
'
.
'
+
fmt
filename
+=
channel
s
[
0
]
+
'
.
'
+
basename
+
'
.
'
+
fmt
print
(
'
[fastnnlo_runtime]: Saving runtime plot {}
'
.
format
(
filename
))
fig
.
savefig
(
filename
)
def
plot_events_per_hour
(
informationdict
,
out_path
,
out_name
,
formats
):
time
=
informationdict
[
'
runtime
'
]
unit
=
informationdict
[
'
runtime_unit
'
]
channel
=
informationdict
[
'
channel
'
]
events
=
informationdict
[
'
events
'
]
basename
=
'
evtrate
'
if
unit
==
'
hours
'
:
eph
=
events
/
time
else
:
eph
=
events
/
(
time
/
60
)
def
plot_events_per_hour
(
infodict
,
out_path
,
out_name
,
formats
):
# get input
channels
=
infodict
[
'
channels
'
]
events
=
infodict
[
'
events
'
]
times
=
infodict
[
'
runtime
'
]
unit
=
infodict
[
'
runtime_unit
'
]
# prepare input
unique_channels
=
set
(
channels
)
if
len
(
unique_channels
)
==
0
:
print
(
'
fastnnlo_runtime: ERROR! Aborted, no channel info found.
'
)
exit
(
11
)
eph
=
[]
for
i
,
time
in
enumerate
(
times
):
if
unit
==
'
hours
'
:
eph
.
append
(
float
(
events
[
i
])
/
time
)
else
:
eph
.
append
(
float
(
events
[
i
])
/
(
time
/
60
))
ephchn
=
[]
for
i
,
val
in
enumerate
(
eph
):
for
j
,
chn
in
enumerate
(
_channels
):
if
channels
[
i
]
==
chn
:
ephchn
.
append
([
val
,
j
])
ephchn
=
np
.
array
(
ephchn
)
masks
=
[]
for
i
,
chn
in
enumerate
(
_channels
):
masks
.
append
(
ephchn
[:,
1
]
==
i
)
# get relevant values
mean
=
np
.
mean
(
eph
)
std
=
np
.
std
(
eph
)
median
=
np
.
median
(
eph
)
iqd
=
np
.
subtract
(
*
np
.
percentile
(
eph
,
[
75
,
25
],
interpolation
=
'
linear
'
))
/
2.
CPUtime
=
np
.
sum
(
time
)
/
(
1
if
unit
==
'
hours
'
else
60
)
# set figure
mean
=
np
.
mean
(
eph
)
std
=
np
.
std
(
eph
)
median
=
np
.
median
(
eph
)
iqd
=
np
.
subtract
(
*
np
.
percentile
(
eph
,
[
75
,
25
],
interpolation
=
'
linear
'
))
/
2.
ephmin
=
np
.
min
(
eph
)
ephmax
=
np
.
max
(
eph
)
logbins
=
np
.
geomspace
(
ephmin
,
ephmax
,
100
)
CPUtime
=
np
.
sum
(
times
)
/
(
1
if
unit
==
'
hours
'
else
60
)
# create figure
fig
=
plt
.
figure
(
figsize
=
(
16
,
12
))
ax
=
fig
.
gca
()
# plot histogram
n
,
batches
,
_
=
ax
.
hist
(
eph
,
bins
=
20
,
color
=
'
deepskyblue
'
,
edgecolor
=
'
black
'
,
label
=
'
Total CPU time: {0:0.0f} hours
'
.
format
(
CPUtime
))
# plot mean and median
ax
.
vlines
(
mean
,
0
,
max
(
n
),
colors
=
'
red
'
,
linestyles
=
'
dashed
'
,
label
=
r
'
Mean: {0:0.1e}$\pm${1:0.1e} events/hour
'
.
format
(
mean
,
std
))
ax
.
vlines
(
median
,
0
,
max
(
n
),
colors
=
'
green
'
,
linestyles
=
'
dashed
'
,
label
=
r
'
Median: {0:0.2e}$\pm${1:0.2e} events/hour
'
.
format
(
median
,
iqd
))
# plot (multistack-)histogram
evrs
=
[]
lastch
=
'
LO
'
for
chn
in
_channels
:
if
chn
in
unique_channels
:
evrs
.
append
(
ephchn
[
masks
[
_channel_number
[
chn
]]][:,
0
])
lastch
=
chn
if
len
(
unique_channels
)
==
1
:
n
,
batches
,
_
=
ax
.
hist
(
evrs
,
log
=
True
,
bins
=
20
,
edgecolor
=
'
black
'
,
color
=
_channel_colors
[
_channel_number
[
lastch
]],
label
=
'
Total CPU time: {0:0.0f} hours
'
.
format
(
CPUtime
))
# plot mean and median
ax
.
vlines
(
mean
,
0
,
max
(
n
),
colors
=
'
red
'
,
linestyles
=
'
dashed
'
,
label
=
r
'
Mean: {0:0.1e}$\pm${1:0.1e} events/hour
'
.
format
(
mean
,
std
))
ax
.
vlines
(
median
,
0
,
max
(
n
),
colors
=
'
green
'
,
linestyles
=
'
dashdot
'
,
label
=
r
'
Median: {0:0.2e}$\pm${1:0.2e} events/hour
'
.
format
(
median
,
iqd
))
ax
.
ticklabel_format
(
axis
=
'
x
'
,
style
=
'
plain
'
,
useOffset
=
False
)
else
:
n
,
batches
,
_
=
ax
.
hist
(
evrs
,
histtype
=
'
barstacked
'
,
log
=
True
,
stacked
=
True
,
bins
=
logbins
,
edgecolor
=
'
black
'
,
color
=
_channel_colors
,
label
=
_channels
)
ax
.
set_xlim
(
0.9
*
ephmin
,
1.1
*
ephmax
)
ax
.
set_xscale
(
'
log
'
)
# finish and save figure
ax
.
set_title
(
'
Event rate of
'
+
channel
+
'
production
'
,
fontsize
=
20
)
chnlabel
=
channels
[
0
]
if
out_name
:
chnlabel
=
out_name
ax
.
set_title
(
'
Event rate of
'
+
chnlabel
+
'
production
'
,
fontsize
=
20
)
ax
.
set_xlabel
(
'
event rate [1/hour]
'
,
horizontalalignment
=
'
right
'
,
x
=
1.0
,
verticalalignment
=
'
top
'
,
y
=
1.0
,
fontsize
=
20
)
ax
.
set_ylabel
(
'
frequency
'
,
horizontalalignment
=
'
right
'
,
x
=
1.0
,
verticalalignment
=
'
top
'
,
y
=
1.0
,
fontsize
=
20
)
ax
.
set_yscale
(
'
log
'
)
ax
.
set_ylabel
(
'
# jobs
'
,
horizontalalignment
=
'
right
'
,
x
=
1.0
,
verticalalignment
=
'
top
'
,
y
=
1.0
,
fontsize
=
20
,
labelpad
=
20
)
ax
.
tick_params
(
axis
=
'
both
'
,
which
=
'
major
'
,
labelsize
=
20
)
ax
.
legend
(
loc
=
'
best
'
,
fontsize
=
20
)
ax
.
grid
()
ax
.
set_axisbelow
(
True
)
# set saving location
basename
=
'
evtrate
'
for
fmt
in
formats
:
filename
=
out_path
+
(
''
if
out_path
[
-
1
]
==
'
/
'
else
'
/
'
)
if
out_name
:
filename
+=
out_name
+
'
.
'
+
basename
+
'
.
'
+
fmt
else
:
filename
+=
channel
+
'
.
'
+
basename
+
'
.
'
+
fmt
filename
+=
channel
s
[
0
]
+
'
.
'
+
basename
+
'
.
'
+
fmt
print
(
'
[fastnnlo_runtime]: Saving event rate plot {}
'
.
format
(
filename
))
fig
.
savefig
(
filename
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment