Skip to content
Snippets Groups Projects
Commit 10eb88f1 authored by Sebastian Brommer's avatar Sebastian Brommer :hand_splayed_tone4:
Browse files

add nemo2 config

parent e0e2bcc1
Branches
No related tags found
No related merge requests found
# bwForCluster OpenStack Worker Setup
# -----------------------------------
# Cloud attributes
STARTD.CloudSite = "BWFORCLUSTER_NEMO2"
# STARTD_DEBUG = D_ANY
# START_DEBUG = D_ANY
# STARTER_DEBUG = D_FULLDEBUG
# MASTER_DEBUG = D_ANY
# Machine/Job environment
# Use TardisDroneUuid to identify drones in OBS
# TardisDroneUuid = "$ENV(TardisDroneUuid)"
# STARTD_NAME = $ENV(TardisDroneUuid)
# Matchmaking advertisement
STARTD.ProvidesIO = TRUE
STARTD.ProvidesCpu = TRUE
# Only allow token based authentication
SEC_DEFAULT_AUTHENTICATION_METHODS = PASSWORD, IDTOKENS, SCITOKENS
SEC_TOKEN_DIRECTORY = $(ETC)/tokens.d/
SEC_TOKEN_SYSTEM_DIRECTORY = $(ETC)/tokens.d/
DomainNemo = login2.nemo.privat
ALLOW_DEAMON = $(ALLOW_DEAMON), condor@$(DomainNemo), condor_pool@$(DomainNemo)
ALLOW_WRITE = $(ALLOW_WRITE), condor@$(DomainNemo), condor_pool@$(DomainNemo)
ALLOW_ADMINISTRATOR = $(ALLOW_ADMINISTRATOR), \
condor@$(UID_DOMAIN)/$(IP_ADDRESS)
# Network settings
# PRIVATE_NETWORK_NAME = cloud.ruf.uni-freiburg.de
DEFAULT_DOMAIN_NAME = $(DomainNemo)
UID_DOMAIN = $(DomainNemo)
# localhost's condor is admin
ALLOW_ADMINISTRATOR = $(ALLOW_ADMINISTRATOR), \
condor@$(UID_DOMAIN)/$(IP_ADDRESS)
# Firewall may kill connections, which are inactive for too long
TCP_KEEPALIVE_INTERVAL = 4 * $(MINUTE)
# Wall-time
WalltimeDefault = 86400
# Read/set MachineMaxWalltime & MachineStarttime (set by cloud-init)
include command into $(ETC)/walltime.cfg : python3 $(ETC)/meta_walltime_moab.py
MachineWalltime = $(MachineMaxWalltime:$(WalltimeDefault))
MaxShutdownTime = $(MachineStarttime) + $(MachineWalltime)
STARTD.MaxJobRetirementTime = $(MaxShutdownTime) - $(JobStart:0) - 3 * $(MINUTE)
# Auto Shutdown
# Shutting down MASTER will trigger shutdown to all other daemons
# Early "nice" shutdown 10 Minutes before limit is reached
MASTER.DAEMON_SHUTDOWN = ( TimeToLive <= 10 * $(MINUTE) )
# Fast shutdown, 3 minutes before wall-time limit is reached
MASTER.DAEMON_SHUTDOWN_FAST = ( TimeToLive <= 3 * $(MINUTE) )
# Set number of CPUs
NUM_CPUS = 1
# Machine/Job environment
# Read /etc/environment, for example for http proxy
STARTD.JOB_INHERITS_STARTER_ENVIRONMENT = TRUE
include command into $(ETC)/environment.cfg : $(ETC)/get_machine_env.sh
# Inverse wall-time job ranking
# - prefer long running jobs (shorter jobs can back-fill)
###
# -> Scale by ( wall-time / remaining machine lifetime )
# Cut off by (START = FALSE) if RequestWalltime > TimeToLive.
# Thus we can safely set it to maximum in this case to prevent preemption
###
RankJobWtime = \
ifThenElse( isUndefined(TARGET.RequestWalltime), \
0, \
ifThenElse( TARGET.RequestWalltime > $(TimeToLive), \
$(RankJobWtimeMax), \
$(RankJobWtimeMax) * ( TARGET.RequestWalltime / $(TimeToLive) ) \
) \
)
# Average CPU utilization
STARTD_PARTITIONABLE_SLOT_ATTRS = $(STARTD_PARTITIONABLE_SLOT_ATTRS), CPUsUsage
AverageCPUsUsage = Sum(My.ChildCPUsUsage)/Sum(My.ChildCPUs)
STARTD_ATTRS = $(STARTD_ATTRS), AverageCPUsUsage
#Check if filesystems are mounted
STARTD_CRON_JOBLIST = NODEHEALTH
STARTD_CRON_NODEHEALTH_EXECUTABLE = $(ETC)/git/scripts/healthcheck_cvmfs.sh
STARTD_CRON_NODEHEALTH_MODE = Periodic
STARTD_CRON_NODEHEALTH_PERIOD = 600s
SINGULARITY_JOB = TRUE
SINGULARITY = /usr/bin/apptainer
SINGULARITY_HOOK_PREPARE_JOB = $(ETC)/git/scripts/prepare_singularity.py
SINGULARITY_IMAGE_EXPR = IfThenElse(TARGET.ContainerImage=?=undefined, "/cvmfs/unpacked.cern.ch/registry.hub.docker.com/mschnepf/slc7-condocker:latest", TARGET.ContainerImage)
SINGULARITY_RUN_TEST_BEFORE_JOB = FALSE
SINGULARITY_TARGET_DIR = /srv
MOUNT_UNDER_SCRATCH =
# The container needs to use the execute directory as HOME directory
SINGULARITY_HOME = /srv
SINGULARITY_EXTRA_ARGUMENTS = "--userns --env SINGULARITY_BIND= --env APPTAINER_BIND= --env APPTAINER_BINDPATH="
# Only accept RemoteJobs which will finish in time
START = $(START) && \
( TARGET.RemoteJob =?= TRUE ) && \
( TARGET.Owner == "sbrommer" ) && \
( $(TimeToLive) > TARGET.RequestWalltime )
......@@ -42,6 +42,49 @@
}
]
},
"BwForClusterNemo2": {
"config": [
"0001.constants.cfg",
"0002.linux.cfg",
"0010.node_base.cfg",
"0012.node_slave.cfg",
"0015.logging.cfg",
"0021.security.cfg",
"0060.node_worker.cfg",
"0065.node_worker_remote.cfg",
"0082.slot_users.cfg",
"0083.partitionable_slots.cfg",
"0085.auto_shutdown.cfg",
"0088.ssh_to_job.cfg",
"0212.node_worker_bwforcluster_nemo2.cfg"
],
"scripts": [
{
"destination": "/etc/condor/",
"name": "config_pull.py"
},
{
"destination": "/etc/condor/",
"name": "meta_walltime_moab.py"
},
{
"destination": "/etc/condor/",
"name": "get_machine_env.sh"
},
{
"destination": "/etc/condor/",
"name": "collector_address.py"
},
{
"destination": "/etc/condor/",
"name": "config_sites.json"
},
{
"destination": "/etc/condor/",
"name": "shutdown.sh"
}
]
},
"Collector": {
"config": [
"0001.constants.cfg",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment