diff --git a/backgrounds.py b/backgrounds.py deleted file mode 100644 index e402c99b38f2f5588de5fbd395561c6a3777c141..0000000000000000000000000000000000000000 --- a/backgrounds.py +++ /dev/null @@ -1,304 +0,0 @@ -from dataclasses import dataclass -from typing import List, Optional -import yaml -import os -from pathlib import Path - -yaml_default_path = os.environ["BEAMBACKGROUND_YAML"] # "/work/jeppelt/etpackground/beambackgrounds.yaml" #TODO set this properly - -@dataclass -class BackgroundData: - ''' - class to store metadata of a beambackground and provide functions for convenient access - @param key: key name under which it is stored in the yaml file - @param kek_path: path to the files on KEK - @param gridka_path: path to the files on Gridka - @param ceph_path: path to the files on Ceph - @param luminostiy: luminosity of the beam background files - @param n_events: number of events in the beam background files - @param experiment: rundependent only!, experiment number of the beam background files - @param run: rundependent only!, run number of the beam background files - @param bucket: rundependent only!, bucket number of the beam background files for use in globaltags - ''' - key: str - kek_path: Optional[str] = None - gridka_path: Optional[str] = None - ceph_path: Optional[str] = None - luminostiy: int = 0 - n_events: int = 0 - experiment: int = 0 - run: int = 0 - bucket: int = 0 - gridka_server: str = "root://dcachexrootd-kit.gridka.de:1094" - - @property - def on_kek(self) -> bool: - return True # TODO: Implement check if on KEK - - @property - def kek_file_list(self) -> List[str]: - ''' - function to get the file list for the kek storage - ''' - assert self.on_kek, "Executions does not seem to be on KEK. Kek paths are not accessible." - assert self.kek_path is not None, "kek_paths is None. Please set the paths." - assert (os.path.exists(self.kek_path) and os.path.isdir(self.kek_path)), f"The path '{self.kek_path}' does not exist or is not a directory." - return [f"{self.kek_path}/{filename}" for filename in os.listdir(self.kek_path)] - - @property - def on_etp(self) -> bool: - return True #TODO: Implement check if on ETP - - @property - def ceph_file_list(self) -> List[str]: - ''' - function to get the file list for the ceph storage - ''' - assert self.on_etp, "Executions does not seem to be on ETP. Ceph paths are not accessible." - assert self.ceph_path is not None, "ceph_paths is None. Please set the paths." - assert (os.path.exists(self.ceph_path) and os.path.isdir(self.ceph_path)), f"The path '{self.ceph_path}' does not exist or is not a directory." - return [f"{self.ceph_path}/{filename}" for filename in os.listdir(self.ceph_path)] - - @property - def has_certificate(self) -> bool: - return "X509_USER_PROXY" in os.environ.keys() #TODO: Implement check if certificate is available - - @property - def gridka_file_list(self) -> List[str]: - ''' - function to get the file list for the gridka storage - ''' - assert self.has_certificate, "No certificate found. Please set X509_USER_PROXY." - assert self.gridka_path is not None, "gridka_paths is None. Please set the paths." - from XRootD import client - from XRootD.client.flags import DirListFlags - client = client.FileSystem(self.gridka_server) - file_list = [] - status, listing = client.dirlist(self.gridka_path, DirListFlags.STAT, timeout=10) - if not status.ok: - raise RuntimeError(status.message) - if listing is None: - print("Warning: No files found in directory.") # TODO use logger or turn to assert? - return file_list - for entry in listing: - if entry.name.endswith(".root") and not entry.name == "": - file_list.append(f"{self.gridka_server}/{self.gridka_path}/{entry.name}") - return file_list - - def download_from_gridka(self, target_path: str) -> None: - ''' - function to download the files from Gridka to a target path using XRootD python bindings - @param target_path: path to the target directory - ''' - assert self.has_certificate, "No certificate found. Please set X509_USER_PROXY." - assert self.gridka_path is not None, "gridka_paths is None. Please set the paths." - from XRootD import client - client = client.FileSystem(self.gridka_server) - for file in self.gridka_file_list: - status, _ = client.copy(f"{self.gridka_server}/{file}", f"{target_path}/{Path(file).name}") - if not status.ok: - if "File already exists" in status.message: - print(f"Warning: File '{file}' already exists localy. Skipping.") - else: - raise RuntimeError(status.message) - - def update_yaml_entry(self, yaml_data: str = yaml_default_path) -> str: - ''' - function to update the yaml entry for the background and save it to the yaml file - @param yaml_data: path to the yaml file, default is yaml_default_path - ''' - # Load the YAML data - data = yaml.safe_load(yaml_data) - - # Update the specific key's values - data[self.key]['kek_path'] = self.kek_path - data[self.key]['gridka_path'] = self.gridka_path - data[self.key]['ceph_path'] = self.ceph_path - data[self.key]['luminostiy'] = self.luminostiy - data[self.key]['n_events'] = self.n_events - data[self.key]['experiment'] = self.experiment - data[self.key]['run'] = self.run - data[self.key]['bucket'] = self.bucket - data[self.key]['gridka_server'] = self.gridka_server - - # Dump the updated data - yaml.dump(data) - - def send_from_kek_to_ceph(self, ceph_base_path: str, username: str): - ''' - function to send the files from KEK to Ceph at ETP using scp and the portal1 as entrypoint - @param ceph_base_path: base path on Ceph to copy to. - @param username: username at ETP to be used for scp - ''' - assert self.on_kek, "Executions does not seem to be on KEK. Kek paths are not accessible." - assert self.kek_path is not None, "kek_paths is None. Please set the paths." - assert (os.path.exists(self.kek_path) and os.path.isdir(self.kek_path)), f"The path '{self.kek_path}' does not exist or is not a directory." - for file in self.kek_file_list: - os.system(f"scp {file} {username}@portal1.etp.kit.edu:{ceph_base_path}/{Path(file).name}") - self.ceph_path = ceph_base_path - - def send_from_kek_to_gridka(self, gridka_base_path: str, username: str): - ''' - function to send the files from KEK to Gridka using XRootD python bindings - @param gridka_base_path: base path on Gridka to copy to. - @param username: username at ETP to be used for scp - ''' - assert self.on_kek, "Executions does not seem to be on KEK. Kek paths are not accessible." - assert self.kek_path is not None, "kek_paths is None. Please set the paths." - assert (os.path.exists(self.kek_path) and os.path.isdir(self.kek_path)), f"The path '{self.kek_path}' does not exist or is not a directory." - assert self.has_certificate, "No certificate found. Please set X509_USER_PROXY." - from XRootD import client - client = client.FileSystem(self.gridka_server) - for file in self.kek_file_list: - status, _ = client.copy(f"file://{file}", f"{self.gridka_server}/{gridka_base_path}/{Path(file).name}") - if not status.ok: - if "File already exists" in status.message: - print(f"Warning: File '{file}' already exists on Gridka. Skipping.") - else: - raise RuntimeError(status.message) - - def get_file_list(self, key = "gridka") -> List[str]: - ''' - function to get the file list for a specific storage - @param key: key for the storage. Possible values are 'kek', 'gridka' and 'ceph', default is 'gridka' - ''' - if key == "kek": - return self.kek_file_list - elif key == "gridka": - return self.gridka_file_list - elif key == "ceph": - return self.ceph_file_list - else: - raise RuntimeError(f"Key '{key}' not known. Use 'kek', 'gridka' or 'ceph'.") - -@dataclass -class BeambackgroundList: - ''' - convienece class to store multiple beambackgrounds and provide functions for convenient access - @param beambackgrounds: list of beambackgrounds - ''' - beambackgrounds = List[BackgroundData] - - @property - def kek_file_list(self) -> List[str]: - ''' - convenience function to get the file list for the kek storage - ''' - file_list = [] - for background in self.beambackgrounds: - file_list += background.get_kek_file_list - return file_list - - @property - def gridka_file_list(self) -> List[str]: - ''' - convenience function to get the file list for the gridka storage - ''' - file_list = [] - for background in self.beambackgrounds: - file_list += background.get_gridka_file_list - return file_list - - @property - def ceph_file_list(self) -> List[str]: - ''' - convenience function to get the file list for the ceph storage - ''' - file_list = [] - for background in self.beambackgrounds: - file_list += background.get_ceph_file_list - return file_list - - def get_file_list(self, key = "gridka") -> List[str]: - ''' - convenience function to get the file list for a specific storage - @param key: key for the storage. Possible values are 'kek', 'gridka' and 'ceph', default is 'gridka' - ''' - file_list = [] - for background in self.beambackgrounds: - file_list += background.get_file_list(key) - return file_list - - def update_yaml_entries(self, yaml_data: str = yaml_default_path) -> str: - ''' - convenience function to update the yaml entries for the beambackgrounds and save it to the yaml file - @param yaml_data: path to the yaml file, default is yaml_default_path - ''' - for background in self.beambackgrounds: - background.update_yaml_entry(yaml_data) - -def get_beam_background_by_key(key: str, yaml_data: str = yaml_default_path): - ''' - convenience function to search within a yaml file for a specific key and return the corresponding BackgroundData object - @param key: key name under which it is stored in the yaml file - @param yaml_data: path to the yaml file, default is yaml_default_path - ''' - backgrounds_data = yaml.safe_load(yaml_data)['available_backgrounds'] - assert key in backgrounds_data.keys(), f"Key '{key}' does not exist in yaml file at {yaml_data}. Check the spelling or add the entry." - - return BackgroundData( - kek_paths=backgrounds_data[key]['kek_paths'], - gridka_paths=backgrounds_data[key]['gridka_paths'], - ceph_paths=backgrounds_data[key]['ceph_paths'], - luminostiy=backgrounds_data[key]['luminostiy'], - n_events=backgrounds_data[key]['n_events'], - experiment=backgrounds_data[key]['experiment'], - run=backgrounds_data[key]['run'], - bucket=backgrounds_data[key]['bucket'], - gridka_server=backgrounds_data[key]['gridka_server'] - ) - -def get_beam_backgrounds_by_run(experiment: int, run: int, yaml_data: str = yaml_default_path) -> BackgroundData: - ''' - convenience function to search within a yaml file for a specific run and return the corresponding BackgroundData objects - @param experiment: experiment number of the beam background files - @param run: run number of the beam background files - @param yaml_data: path to the yaml file, default is yaml_default_path - ''' - print("Warning: This function does not check for unique files. If there are multiple entries with the same run and experiment, all of them are returned.") - - backgrounds_data = yaml.safe_load(yaml_data)['available_backgrounds'] - beambackgrounds = [] - for key in backgrounds_data.keys(): - if backgrounds_data[key]['experiment'] == experiment and backgrounds_data[key]['run'] == run: - beambackgrounds.append(get_beam_background_by_key(key, yaml_data)) - if len(beambackgrounds) == 0: - raise RuntimeError(f"No background with experiment '{experiment}' and run '{run}' found in yaml file at {yaml_data}. Check the spelling or add the entry.") - else: - return beambackgrounds - -def get_beam_backgrounds_by_experiment(experiment: int, yaml_data: str = yaml_default_path) -> BackgroundData: - ''' - convenience function to search within a yaml file for a specific experiment and return the corresponding BackgroundData objects - @param experiment: experiment number of the beam background files - @param yaml_data: path to the yaml file, default is yaml_default_path - ''' - print("Warning: This function does not check for unique files. If there are multiple entries with the same experiment, all of them are returned.") - - backgrounds_data = yaml.safe_load(yaml_data)['available_backgrounds'] - beambackgrounds = [] - for key in backgrounds_data.keys(): - if backgrounds_data[key]['experiment'] == experiment: - beambackgrounds.append(get_beam_background_by_key(key, yaml_data)) - if len(beambackgrounds) == 0: - raise RuntimeError(f"No background with experiment '{experiment}' found in yaml file at {yaml_data}. Check the spelling or add the entry.") - else: - return beambackgrounds - -def get_beam_backgrounds_by_bucket(bucket: int, yaml_data: str = yaml_default_path) -> BackgroundData: - ''' - convenience function to search within a yaml file for a specific bucket and return the corresponding BackgroundData objects - @param bucket: bucket number of the beam background files for use in globaltags - @param yaml_data: path to the yaml file, default is yaml_default_path - ''' - print(f"Warning: This function does not check if all runs of a bucket are available. Only beambackgrounds entered in {yaml_data} are considered.") - - backgrounds_data = yaml.safe_load(yaml_data)['available_backgrounds'] - backgrounds = [] - for key in backgrounds_data.keys(): - if backgrounds_data[key]['bucket'] == bucket: - backgrounds.append(get_beam_background_by_key(key, yaml_data)) - if len(backgrounds) == 0: - raise RuntimeError(f"No background with bucket '{bucket}' found in yaml file at {yaml_data}. Check the spelling or add the entry.") - else: - return backgrounds \ No newline at end of file diff --git a/beambackgrounds.yaml b/beambackgrounds.yaml deleted file mode 100644 index f22eedb583b3dc7cb3458cb0f4057af14f2aa5dd..0000000000000000000000000000000000000000 --- a/beambackgrounds.yaml +++ /dev/null @@ -1,71 +0,0 @@ -available_backgrounds: - early_phase3_release5: - kek_paths: [] - gridka_paths: [] - ceph_paths: [] - luminostiy: 0 - n_events: 0 - experiment: "" - run: "" - bucket: "" - nominal_phase3_release6: - kek_paths: [] - gridka_paths: [] - ceph_paths: [] - luminostiy: 0 - n_events: 0 - bucket: "" - exp22_run30: - kek_paths: [] - gridka_paths: [] - ceph_paths: [] - luminostiy: 0 - n_events: 0 - experiment: 22 - run: 30 - bucket: 28 - exp22_run468: - kek_paths: [] - gridka_paths: [] - ceph_paths: [] - luminostiy: 0 - n_events: 0 - experiment: 22 - run: 468 - bucket: 29 - exp24_run985: - kek_paths: [] - gridka_paths: [] - ceph_paths: [] - luminostiy: 0 - n_events: 0 - experiment: 24 - run: 985 - bucket: 30 - exp24_run2176: - kek_paths: [] - gridka_paths: [] - ceph_paths: [] - luminostiy: 0 - n_events: 0 - experiment: 24 - run: 2176 - bucket: 33 - exp26_run898: - kek_paths: [] - gridka_paths: [] - ceph_paths: [] - luminostiy: 0 - n_events: 0 - experiment: 26 - run: 898 - bucket: 35 - exp26_run1485: - kek_paths: [] - gridka_paths: [] - ceph_paths: [] - luminostiy: 0 - n_events: 0 - experiment: 26 - run: 1485 - bucket: 36