diff --git a/bb2etp/__init__.py b/bb2etp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a8ce586c71e9e694edeb9e547d6c6408c0e4ed8a --- /dev/null +++ b/bb2etp/__init__.py @@ -0,0 +1 @@ +from .core import * \ No newline at end of file diff --git a/bb2etp.py b/bb2etp/core.py similarity index 55% rename from bb2etp.py rename to bb2etp/core.py index 2c6781aa8f1bb18fc540ebe57e31fed0b24065d5..a2bba82900cd9a1cd3e9168efd48aab3cd92e8bf 100644 --- a/bb2etp.py +++ b/bb2etp/core.py @@ -1,15 +1,21 @@ from dataclasses import dataclass, field from typing import Any, List, Optional, Union -#import yaml + +# import yaml import json import os from pathlib import Path -json_default_path = os.environ["BEAMBACKGROUND_JSON"] if "BEAMBACKGROUND_JSON" in os.environ.keys() else "beambackgrounds.json" +json_default_path = ( + os.environ["BEAMBACKGROUND_JSON"] + if "BEAMBACKGROUND_JSON" in os.environ.keys() + else "beambackgrounds.json" +) + @dataclass class BeambackgroundMetadata: - ''' + """ class to store metadata of a beambackground and provide functions for convenient access @param key: key name under which it is stored in the json file @param type: type of the beam background. Possible values are 'run-dependent' and 'run-independent' @@ -24,12 +30,33 @@ class BeambackgroundMetadata: @param bucket: rundependent only!, bucket number of the beam background files for use in globaltags @param gridka_server: rundependent only!, server to use for the gridka paths @param global_tags: A list of global Tags associated with this beam background - ''' + """ + key: str - type:str= field(metadata={"validate": lambda t: t in ["run-dependent", "run-independent"]}) - kek_path: Union[List[str], str] = field(metadata={"validate": lambda p: isinstance(p, str) or isinstance(p, list) or (p is None)}) - gridka_path: Union[List[str], str] = field(metadata={"validate": lambda p: isinstance(p, str) or isinstance(p, list) or (p is None)}) - ceph_path: Union[List[str], str] = field(metadata={"validate": lambda p: isinstance(p, str) or isinstance(p, list) or (p is None)}) + type: str = field( + metadata={"validate": lambda t: t in ["run-dependent", "run-independent"]} + ) + kek_path: Union[List[str], str] = field( + metadata={ + "validate": lambda p: isinstance(p, str) + or isinstance(p, list) + or (p is None) + } + ) + gridka_path: Union[List[str], str] = field( + metadata={ + "validate": lambda p: isinstance(p, str) + or isinstance(p, list) + or (p is None) + } + ) + ceph_path: Union[List[str], str] = field( + metadata={ + "validate": lambda p: isinstance(p, str) + or isinstance(p, list) + or (p is None) + } + ) luminosity: float = 0 peak_luminosity: float = 0 n_events: int = 0 @@ -46,92 +73,123 @@ class BeambackgroundMetadata: @property def expList(self) -> List[int]: - ''' + """ convenience function to get the experiment numbers for the beambackgrounds - ''' + """ return [self.experiment] def __post_init__(self): - ''' + """ after the init, turn all 'None' strings into None - ''' + """ for field_name in self.__dataclass_fields__: field_value = getattr(self, field_name) - if field_value == 'None': + if field_value == "None": setattr(self, field_name, None) @property def on_kek(self) -> bool: - return "kek.jp" in os.environ['HOSTNAME'] - + return "kek.jp" in os.environ["HOSTNAME"] + @property def kek_file_list(self) -> List[str]: - ''' + """ function to get the file list for the kek storage - ''' - assert self.on_kek, "Executions does not seem to be on KEK. Kek paths are not accessible." + """ + assert ( + self.on_kek + ), "Executions does not seem to be on KEK. Kek paths are not accessible." if isinstance(self.kek_path, str): - assert (os.path.exists(self.kek_path) and os.path.isdir(self.kek_path)), f"The path '{self.kek_path}' does not exist or is not a directory." - return [f"{self.kek_path}/{filename}" for filename in os.listdir(self.kek_path)] + assert os.path.exists(self.kek_path) and os.path.isdir( + self.kek_path + ), f"The path '{self.kek_path}' does not exist or is not a directory." + return [ + f"{self.kek_path}/{filename}" for filename in os.listdir(self.kek_path) + ] else: return self.kek_path - + @property def on_etp(self) -> bool: - return f"/home/{os.environ['USER']}" == os.environ['HOME'] #probably not the best way to check this + return ( + f"/home/{os.environ['USER']}" == os.environ["HOME"] + ) # probably not the best way to check this @property def ceph_file_list(self) -> List[str]: - ''' + """ function to get the file list for the ceph storage - ''' - assert self.on_etp, "Executions does not seem to be on ETP. Ceph paths are not accessible." + """ + assert ( + self.on_etp + ), "Executions does not seem to be on ETP. Ceph paths are not accessible." if isinstance(self.ceph_path, str): - assert (os.path.exists(self.ceph_path) and os.path.isdir(self.ceph_path)), f"The path '{self.ceph_path}' does not exist or is not a directory." - return [f"{self.ceph_path}/{filename}" for filename in os.listdir(self.ceph_path)] + assert os.path.exists(self.ceph_path) and os.path.isdir( + self.ceph_path + ), f"The path '{self.ceph_path}' does not exist or is not a directory." + return [ + f"{self.ceph_path}/{filename}" + for filename in os.listdir(self.ceph_path) + ] else: return self.ceph_path - + @property def has_certificate(self) -> bool: - return "X509_USER_PROXY" in os.environ.keys() #TODO: Implement check if certificate is available + return ( + "X509_USER_PROXY" in os.environ.keys() + ) # TODO: Implement check if certificate is available @property def gridka_file_list(self) -> List[str]: - ''' + """ function to get the file list for the gridka storage - ''' + """ assert self.has_certificate, "No certificate found. Please set X509_USER_PROXY." - assert self.gridka_path is not None, "gridka_paths is None. Please set the paths." + assert ( + self.gridka_path is not None + ), "gridka_paths is None. Please set the paths." if isinstance(self.gridka_path, str): from XRootD import client from XRootD.client.flags import DirListFlags + client = client.FileSystem(self.gridka_server) file_list = [] - status, listing = client.dirlist(self.gridka_path, DirListFlags.STAT, timeout=10) + status, listing = client.dirlist( + self.gridka_path, DirListFlags.STAT, timeout=10 + ) if not status.ok: raise RuntimeError(status.message) if listing is None: - print("Warning: No files found in directory.") # TODO use logger or turn to assert? + print( + "Warning: No files found in directory." + ) # TODO use logger or turn to assert? return file_list for entry in listing: if entry.name.endswith(".root") and not entry.name == "": - file_list.append(f"{self.gridka_server}/{self.gridka_path}/{entry.name}") + file_list.append( + f"{self.gridka_server}/{self.gridka_path}/{entry.name}" + ) return file_list else: return self.gridka_path def download_from_gridka(self, target_path: str) -> None: - ''' + """ function to download the files from Gridka to a target path using XRootD python bindings @param target_path: path to the target directory - ''' + """ assert self.has_certificate, "No certificate found. Please set X509_USER_PROXY." - assert self.gridka_path is not None, "gridka_paths is None. Please set the paths." + assert ( + self.gridka_path is not None + ), "gridka_paths is None. Please set the paths." from XRootD import client + client = client.FileSystem(self.gridka_server) for file in self.gridka_file_list: - status, _ = client.copy(f"{self.gridka_server}/{file}", f"{target_path}/{Path(file).name}") + status, _ = client.copy( + f"{self.gridka_server}/{file}", f"{target_path}/{Path(file).name}" + ) if not status.ok: if "File already exists" in status.message: print(f"Warning: File '{file}' already exists localy. Skipping.") @@ -139,61 +197,74 @@ class BeambackgroundMetadata: raise RuntimeError(status.message) def update_json_entry(self, json_data: str = json_default_path) -> str: - ''' + """ function to update the json entry for the background and save it to the json file @param json_data: path to the json file, default is json_default_path - ''' + """ # Load the json data - with open(json_data, 'r') as file: + with open(json_data, "r") as file: data = json.load(file) if self.key not in data.keys(): data[self.key] = {} # Update the specific key's values - data[self.key]['type'] = self.type - data[self.key]['kek_path'] = self.kek_path - data[self.key]['gridka_path'] = self.gridka_path - data[self.key]['ceph_path'] = self.ceph_path - data[self.key]['luminostiy'] = self.luminosity - data[self.key]['n_events'] = self.n_events - data[self.key]['experiment'] = self.experiment - data[self.key]['run'] = self.run - data[self.key]['bucket'] = self.bucket - data[self.key]['date'] = self.date - data[self.key]['beam_energy'] = self.beam_energy - data[self.key]['qualitiy'] = self.qualitiy - data[self.key]['gridka_server'] = self.gridka_server - data[self.key]['global_tags'] = self.global_tags + data[self.key]["type"] = self.type + data[self.key]["kek_path"] = self.kek_path + data[self.key]["gridka_path"] = self.gridka_path + data[self.key]["ceph_path"] = self.ceph_path + data[self.key]["luminostiy"] = self.luminosity + data[self.key]["n_events"] = self.n_events + data[self.key]["experiment"] = self.experiment + data[self.key]["run"] = self.run + data[self.key]["bucket"] = self.bucket + data[self.key]["date"] = self.date + data[self.key]["beam_energy"] = self.beam_energy + data[self.key]["qualitiy"] = self.qualitiy + data[self.key]["gridka_server"] = self.gridka_server + data[self.key]["global_tags"] = self.global_tags - with open(json_data, 'w') as file: + with open(json_data, "w") as file: # Dump the updated data json.dump(data, file) - + def send_from_kek_to_ceph(self, ceph_base_path: str, username: str): - ''' + """ function to send the files from KEK to Ceph at ETP using scp and the portal1 as entrypoint @param ceph_base_path: base path on Ceph to copy to. @param username: username at ETP to be used for scp - ''' - assert self.on_kek, "Executions does not seem to be on KEK. Kek paths are not accessible." + """ + assert ( + self.on_kek + ), "Executions does not seem to be on KEK. Kek paths are not accessible." assert self.kek_path is not None, "kek_paths is None. Please set the paths." for file in self.kek_file_list: - os.system(f"scp {file} {username}@portal1.etp.kit.edu:{ceph_base_path}/{Path(file).name}") + os.system( + f"scp {file} {username}@portal1.etp.kit.edu:{ceph_base_path}/{Path(file).name}" + ) self.ceph_path = ceph_base_path def send_from_kek_to_gridka(self, gridka_base_path: str): - ''' + """ function to send the files from KEK to Gridka using XRootD python bindings @param gridka_base_path: base path on Gridka to copy to. @param username: username at ETP to be used for scp - ''' - assert self.on_kek, "Executions does not seem to be on KEK. Kek paths are not accessible." + """ + assert ( + self.on_kek + ), "Executions does not seem to be on KEK. Kek paths are not accessible." assert self.kek_path is not None, "kek_paths is None. Please set the paths." - assert (os.path.exists(self.kek_path) and os.path.isdir(self.kek_path)), f"The path '{self.kek_path}' does not exist or is not a directory." + assert os.path.exists(self.kek_path) and os.path.isdir( + self.kek_path + ), f"The path '{self.kek_path}' does not exist or is not a directory." assert self.has_certificate, "No certificate found. Please set X509_USER_PROXY." from XRootD import client + client = client.FileSystem(self.gridka_server) for file in self.kek_file_list: - status, _ = client.copy(f"file://{file}", f"{self.gridka_server}/{gridka_base_path}/{Path(file).name}") + + status, _ = client.copy( + f"file://{file}", + f"{self.gridka_server}/{gridka_base_path}/{Path(file).name}", + ) if not status.ok: if "File already exists" in status.message: print(f"Warning: File '{file}' already exists on Gridka. Skipping.") @@ -201,30 +272,36 @@ class BeambackgroundMetadata: raise RuntimeError(status.message) def send_from_ceph_to_gridka(self, gridka_base_path: str, username: str): - ''' + """ function to send the files from Ceph to Gridka using XRootD python bindings @param gridka_base_path: base path on Gridka to copy to. @param username: username at ETP to be used for scp - ''' - assert self.on_etp, "Executions does not seem to be on ETP. Ceph paths are not accessible." + """ + assert ( + self.on_etp + ), "Executions does not seem to be on ETP. Ceph paths are not accessible." assert self.ceph_path is not None, "ceph_paths is None. Please set the paths." - assert (os.path.exists(self.ceph_path) and os.path.isdir(self.ceph_path)), f"The path '{self.ceph_path}' does not exist or is not a directory." + # assert (os.path.exists(self.ceph_path) and os.path.isdir(self.ceph_path)), f"The path '{self.ceph_path}' does not exist or is not a directory." assert self.has_certificate, "No certificate found. Please set X509_USER_PROXY." from XRootD import client + client = client.FileSystem(self.gridka_server) for file in self.ceph_file_list: - status, _ = client.copy(f"file://{file}", f"{self.gridka_server}/{gridka_base_path}/{Path(file).name}") + status, _ = client.copy( + f"file://{file}", + f"{self.gridka_server}/{gridka_base_path}/{Path(file).name}", + ) if not status.ok: if "File already exists" in status.message: print(f"Warning: File '{file}' already exists on Gridka. Skipping.") else: raise RuntimeError(status.message) - def get_file_list(self, key = None) -> List[str]: - ''' + def get_file_list(self, key=None) -> List[str]: + """ function to get the file list for a specific storage. By default checks on which system it is and uses the appropriate paths. @param key: key for the storage. Possible values are 'kek', 'gridka' and 'ceph', default is 'gridka' - ''' + """ if key == "kek": return self.kek_file_list elif key == "gridka": @@ -242,30 +319,41 @@ class BeambackgroundMetadata: raise RuntimeError("No valid key given and no valid environment found.") else: raise RuntimeError(f"Key '{key}' not known. Use 'kek', 'gridka' or 'ceph'.") - + + def get_to_ceph_from_kek(self, host: str = "kekcc"): + """ + function to get the files from KEK to Ceph at ETP using rsync. + """ + assert ( + self.on_etp + ), "Executions does not seem to be on ETP. Ceph paths are not accessible." + assert self.kek_path is not None, "kek_paths is None. Please set the paths." + os.system(f"rsync -acz -r --mkpath --stats -h --progress {host}:{self.kek_path} {self.ceph_path}") + + def get_beam_background_by_key(key: str, json_data: str = json_default_path): - ''' + """ convenience function to search within a json file for a specific key and return the corresponding BackgroundData object @param key: key name under which it is stored in the json file @param json_data: path to the json file, default is json_default_path - ''' - with open(json_data, 'r') as file: + """ + with open(json_data, "r") as file: backgrounds_data = json.load(file) - + return BeambackgroundMetadata( key=key, - type=backgrounds_data[key]['type'], - kek_path=backgrounds_data[key]['kek_path'], - gridka_path=backgrounds_data[key]['gridka_path'], - ceph_path=backgrounds_data[key]['ceph_path'], - luminosity=backgrounds_data[key]['luminostiy'], - n_events=backgrounds_data[key]['n_events'], - experiment=backgrounds_data[key]['experiment'], - date=backgrounds_data[key]['date'], - beam_energy=backgrounds_data[key]['beam_energy'], - qualitiy=backgrounds_data[key]['qualitiy'], - global_tags=backgrounds_data[key]['global_tags'], - gridka_server=backgrounds_data[key]['gridka_server'], - run=backgrounds_data[key]['run'], - bucket=backgrounds_data[key]['bucket'], + type=backgrounds_data[key]["type"], + kek_path=backgrounds_data[key]["kek_path"], + gridka_path=backgrounds_data[key]["gridka_path"], + ceph_path=backgrounds_data[key]["ceph_path"], + luminosity=backgrounds_data[key]["luminostiy"], + n_events=backgrounds_data[key]["n_events"], + experiment=backgrounds_data[key]["experiment"], + date=backgrounds_data[key]["date"], + beam_energy=backgrounds_data[key]["beam_energy"], + qualitiy=backgrounds_data[key]["qualitiy"], + global_tags=backgrounds_data[key]["global_tags"], + gridka_server=backgrounds_data[key]["gridka_server"], + run=backgrounds_data[key]["run"], + bucket=backgrounds_data[key]["bucket"], ) diff --git a/beambackgrounds.json b/beambackgrounds.json index 4f00f375b097161945a38a5c97ee2537010ee800..a69b9c7672d65ef25b667694ca4d3c4f59a574ee 100644 --- a/beambackgrounds.json +++ b/beambackgrounds.json @@ -6398,7 +6398,7 @@ "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket28/sub00/beambg_000008_prod00023164_task2129000008.root", "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket28/sub00/beambg_000001_prod00023164_task2129000001.root" ], - "gridka_path": "/pnfs/gridka.de/belle/disk-only/LOCAL/group/beam_background/run_dependent/22_30/", + "gridka_path": null, "ceph_path": null, "luminostiy": 0.260951, "n_events": 104189, @@ -10093,7 +10093,7 @@ "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket31/sub00/beambg_000001_prod00024901_task23984000001.root", "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket31/sub00/beambg_000019_prod00024901_task23984000019.root" ], - "gridka_path": "/pnfs/gridka.de/belle/disk-only/LOCAL/group/beam_background/run_dependent/24_985/", + "gridka_path": null, "ceph_path": null, "luminostiy": 0.62329906, "n_events": 490664, @@ -16063,7 +16063,7 @@ "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket33/sub00/beambg_000020_prod00025346_task232175000020.root", "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket33/sub00/beambg_000013_prod00025346_task232175000013.root" ], - "gridka_path": "/pnfs/gridka.de/belle/disk-only/LOCAL/group/beam_background/run_dependent/24_2176/", + "gridka_path": null, "ceph_path": null, "luminostiy": 0.31112209, "n_events": 122875, @@ -31715,7 +31715,7 @@ "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket29/sub00/beambg_000013_prod00023342_task21467000013.root", "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket29/sub00/beambg_000012_prod00023342_task21467000012.root" ], - "gridka_path": "/pnfs/gridka.de/belle/disk-only/LOCAL/group/beam_background/run_dependent/22_468/", + "gridka_path": null, "ceph_path": null, "luminostiy": 0.27456198, "n_events": 108337, @@ -48806,7 +48806,7 @@ "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket35/sub00/beambg_000017_prod00026309_task25897000017.root", "/group/belle2/dataprod/MC/ecl_leakageCorrections/BGOverlay/bucket35/sub00/beambg_000004_prod00026309_task25897000004.root" ], - "gridka_path": "/pnfs/gridka.de/belle/disk-only/LOCAL/group/beam_background/run_dependent/26_898/", + "gridka_path": "/pnfs/gridka.de/belle/disk-only/LOCAL/group/beam_background/run_dependent/226_898/", "ceph_path": "/ceph/ihaide/BGOverlay/run_dependent/26_898/", "luminostiy": 0.39538333, "n_events": 153402, diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..0c6ce7bc4901fa980bfd689543c45742b5c3de45 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "bb2etp" +version = "0.1.0" +description = "A description of your module" +authors = [ + { name = "Jonas Eppelt", email = "jonas.eppelt@kit.edu" } +] + diff --git a/setup.py b/setup.py index 1a8b02f345448cf632cf47005cc65774849e3c30..641c46a3a6933789a3b01519cccfa3a9784ae5d7 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,6 @@ -from setuptools import setup, find_packages -setup( - name='bb2etp', - version='0.1.0', - author='Jonas Eppelt', - author_email='jonas.eppelt@kit.edu', - description='A description of your module', - packages=find_packages(), - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - ], -) +from setuptools import setup + +setup() + + diff --git a/zipforship.sh b/zipforship.sh new file mode 100644 index 0000000000000000000000000000000000000000..2a7adf0127173a98d8760ece3df13d2b3f0e84b2 --- /dev/null +++ b/zipforship.sh @@ -0,0 +1,5 @@ +cd ../ +tar -czf bb2etp.tar BB2ETP +mv bb2etp.tar BB2ETP/bb2etp.tar +cd BB2ETP +