From 1980bbf8c003cb52bab0b1400aadeb031a4432c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20M=C3=BCller?= <mueller@kip.uni-heidelberg.de> Date: Mon, 13 Jan 2025 15:19:03 +0100 Subject: [PATCH] feat: add OCI cache helper tools --- fetch_cached_buildresults.py | 128 ++++++++++++++++++++++++++++++++ fetch_cached_sources.py | 114 ++++++++++++++++++++++++++++ specfile_dag_hash.py | 42 +++++++++++ specfile_storage_path_build.py | 48 ++++++++++++ specfile_storage_path_source.py | 62 ++++++++++++++++ update_cached_buildresults.py | 83 +++++++++++++++++++++ update_cached_sources.py | 69 +++++++++++++++++ 7 files changed, 546 insertions(+) create mode 100644 fetch_cached_buildresults.py create mode 100644 fetch_cached_sources.py create mode 100644 specfile_dag_hash.py create mode 100644 specfile_storage_path_build.py create mode 100644 specfile_storage_path_source.py create mode 100644 update_cached_buildresults.py create mode 100644 update_cached_sources.py diff --git a/fetch_cached_buildresults.py b/fetch_cached_buildresults.py new file mode 100644 index 00000000..99363fec --- /dev/null +++ b/fetch_cached_buildresults.py @@ -0,0 +1,128 @@ +import argparse +import os +import pathlib +import subprocess + +parser = argparse.ArgumentParser( + prog='fetch_cached_buildresults.py', + description='Downloading missing source files to a spack cache.', + epilog='...') + +parser.add_argument( + "path_missing", type=pathlib.Path, + help="Location of the output file that will list the packages not yet in the build cache.") + +parser.add_argument( + "specfiles", nargs="+", + help="Location of the file containing the specs to be available.") + +parser.add_argument( + "--remote-cache", type=str, required=(not (("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ))), + default=(("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ)) and + ("{}/{}/build_cache".format(os.environ["HARBOR_HOST"], os.environ["HARBOR_PROJECT"])) or "", + help="Path or URL to remote cache (target).") + +parser.add_argument( + "--remote-cache-type", type=str, choices=["oci"], + default="oci", + help="Type of the remote cache.") + +parser.add_argument( + "--remote-cache-username", type=str, + default="HARBOR_USERNAME" in os.environ and + pathlib.Path(os.environ["HARBOR_USERNAME"]) or "", + help="Username for remote cache (if applicable)") + +parser.add_argument( + "--remote-cache-password", type=str, + default="HARBOR_PASSWORD" in os.environ and + pathlib.Path(os.environ["HARBOR_PASSWORD"]) or None, + help="Password for remote cache (if applicable)") + +parser.add_argument( + "--local-cache", type=str, + default="YASHCHIKI_CACHE_BUILD" in os.environ and + pathlib.Path(os.environ["YASHCHIKI_CACHE_BUILD"]) or + os.path.expanduser("~/.yashchiki/cache/"), + help="Path to local spack cache folder (build results).") + +parser.add_argument( + "--yashchiki-home", type=str, required=True, + help="Path to yashchiki home for calling helper tools.") + +parser.add_argument( + "--include-installed", + action='store_true', default=False, + help="Include already installed specs.") + +args = parser.parse_args() + +local_cache = pathlib.Path(args.local_cache) +if not os.path.exists(args.local_cache): + print("Creating local build cache directory") + local_cache.mkdir(parents=True, exist_ok=False) + +missing_packages = [] +available_packages = [] +cached_paths = [] +cmd = ["oras", "repo", "tags"] +if args.remote_cache_username and args.remote_cache_password: + cmd.extend(["--username", args.remote_cache_username]) + cmd.extend(["--password", args.remote_cache_password]) +cmd.append(args.remote_cache) +try: + tags = subprocess.check_output(cmd) + tags = tags.decode("utf-8") + cached_paths = tags.split() +except subprocess.CalledProcessError as e: + print(f"Listing repo tags of \"{args.remote_cache}\" failed.") + +for specfile in args.specfiles: + with open(specfile, "r") as fd: + fetch_paths = [] + packages = {} + try: + include_installed = " --include-installed" if args.include_installed else "" + # FIXME: import and call function, but this would need *this to be run in spack-python already + lines = subprocess.check_output(f"spack-python {args.yashchiki_home}/specfile_storage_path_build.py {specfile}{include_installed}", shell=True) + lines = lines.decode("utf-8") + lines = lines.split("\n") + for line in lines: + if not line: + continue + elems = line.split() + packages[elems[0]] = elems[1:] + except subprocess.CalledProcessError as e: + print(f"Computing fetch buildresult paths failed:", str(e), e.output) + for package_dag_hash, fetch_paths in packages.items(): + missing_paths = [] + for fetch_path in fetch_paths: + basename = os.path.basename(fetch_path) + if basename in cached_paths: + cmd = ["oras", "pull"] + if args.remote_cache_username and args.remote_cache_password: + cmd.extend(["--username", args.remote_cache_username]) + cmd.extend(["--password", args.remote_cache_password]) + cmd.append(args.remote_cache + f":{basename}") + try: + subprocess.check_output(cmd, stderr=subprocess.STDOUT, cwd=local_cache) + except subprocess.CalledProcessError as e: + print(f"Pulling of \"{basename}\" from \"{args.remote_cache}\" failed.") + missing_paths.append(fetch_path) + else: + missing_paths.append(fetch_path) + package_missing = False + for missing_path in missing_paths: + if missing_path.endswith(".spack") or missing_path.endswith(".spec.json"): + package_missing = True + if package_missing: + missing_packages.append(f"{package_dag_hash} " + " ".join(missing_paths)) + else: + available_packages.append(f"{package_dag_hash} " + " ".join(missing_paths)) + +print(len(missing_packages), "missing packages in remote buildresults cache.") +print(len(available_packages), "available packages in remote buildresults cache.") + +if missing_packages: + with open(args.path_missing, "w") as fd: + fd.write("\n".join(missing_packages)) diff --git a/fetch_cached_sources.py b/fetch_cached_sources.py new file mode 100644 index 00000000..6aace8b0 --- /dev/null +++ b/fetch_cached_sources.py @@ -0,0 +1,114 @@ +import argparse +import os +import pathlib +import subprocess + +parser = argparse.ArgumentParser( + prog='fetch_cached_sources.py', + description='Downloading missing source files to a spack cache.', + epilog='...') + +parser.add_argument( + "path_missing", type=pathlib.Path, + help="Location of the output file that will list the packages not yet in the source cache.") + +parser.add_argument( + "specfiles", nargs="+", + help="Location of the file containing the specs to be available.") + +parser.add_argument( + "--remote-cache", type=str, required=(not (("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ))), + default=(("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ)) and + ("{}/{}/source_cache".format(os.environ["HARBOR_HOST"], os.environ["HARBOR_PROJECT"])) or "", + help="Path or URL to remote cache (target).") + +parser.add_argument( + "--remote-cache-type", type=str, choices=["oci"], + default="oci", + help="Type of the remote cache.") + +parser.add_argument( + "--remote-cache-username", type=str, + default="HARBOR_USERNAME" in os.environ and + pathlib.Path(os.environ["HARBOR_USERNAME"]) or "", + help="Username for remote cache (if applicable)") + +parser.add_argument( + "--remote-cache-password", type=str, + default="HARBOR_PASSWORD" in os.environ and + pathlib.Path(os.environ["HARBOR_PASSWORD"]) or None, + help="Password for remote cache (if applicable)") + +parser.add_argument( + "--local-cache", type=str, + default="YASHCHIKI_CACHE_SOURCE" in os.environ and + pathlib.Path(os.environ["YASHCHIKI_CACHE_SOURCE"]) or + os.path.expanduser("~/.yashchiki/cache/"), + help="Path to local spack cache folder (source).") + +parser.add_argument( + "--yashchiki-home", type=str, required=True, + help="Path to yashchiki home for calling helper tools.") + +parser.add_argument( + "--include-installed", + action='store_true', default=False, + help="Include already installed specs.") + +args = parser.parse_args() + +local_cache = pathlib.Path(args.local_cache) +if not os.path.exists(args.local_cache): + print("Creating local cache directory") + local_cache.mkdir(parents=True, exist_ok=False) + +missing_paths = [] +available_paths = [] +cached_paths = [] +cmd = ["oras", "repo", "tags"] +if args.remote_cache_username and args.remote_cache_password: + cmd.extend(["--username", args.remote_cache_username]) + cmd.extend(["--password", args.remote_cache_password]) +cmd.append(args.remote_cache) +try: + tags = subprocess.check_output(cmd) + tags = tags.decode("utf-8") + cached_paths = tags.split() +except subprocess.CalledProcessError as e: + print(f"Listing repo tags of \"{args.remote_cache}\" failed.") + +for specfile in args.specfiles: + with open(specfile, "r") as fd: + fetch_paths = [] + try: + include_installed = " --include-installed" if args.include_installed else "" + # FIXME: import and call function, but this would need *this to be run in spack-python already + paths = subprocess.check_output(f"spack-python {args.yashchiki_home}/specfile_storage_path_source.py {specfile}{include_installed}", shell=True) + paths = paths.decode("utf-8") + fetch_paths = paths.split() + except subprocess.CalledProcessError as e: + print(f"Computing fetch storage paths failed for {specfile}.") + continue + for fetch_path in fetch_paths: + basename = os.path.basename(fetch_path) + if basename in cached_paths: + cmd = ["oras", "pull"] + if args.remote_cache_username and args.remote_cache_password: + cmd.extend(["--username", args.remote_cache_username]) + cmd.extend(["--password", args.remote_cache_password]) + cmd.append(args.remote_cache + f":{basename}") + try: + subprocess.check_output(cmd, stderr=subprocess.STDOUT, cwd=local_cache) + except subprocess.CalledProcessError as e: + print(f"Pulling of \"{basename}\" from \"{args.remote_cache}\" failed.") + missing_paths.append(fetch_path) + available_paths.append(fetch_path) + else: + missing_paths.append(fetch_path) + +print(len(missing_paths), "missing files in remote source cache.") +print(len(available_paths), "available files in remote source cache.") + +if missing_paths: + with open(args.path_missing, "w") as fd: + fd.write("\n".join(missing_paths)) diff --git a/specfile_dag_hash.py b/specfile_dag_hash.py new file mode 100644 index 00000000..6e001b84 --- /dev/null +++ b/specfile_dag_hash.py @@ -0,0 +1,42 @@ +import argparse +from collections.abc import Iterable +import pathlib +import ruamel.yaml as yaml +import spack +import spack.binary_distribution as bindist + +parser = argparse.ArgumentParser( + prog='specfile_dag_hash.py', + description='Extracting DAG hashes from a given specfile', + epilog='...') + +parser.add_argument( + "path_specfile", type=pathlib.Path, + help="Location of the specfile to parse") + +parser.add_argument( + "--include-installed", + action='store_true', default=False, + help="Include already installed specs.") + +args = parser.parse_args() + +with open(args.path_specfile, "r") as fd: + file_content = fd.read() + data = list(yaml.safe_load_all(file_content)) + +to_be_fetched = set() +for rspec in data: + s = spack.spec.Spec.from_dict(rspec) + if not isinstance(s, Iterable): + s = [s] + + maybe_to_be_fetched = spack.traverse.traverse_nodes(s, key=spack.traverse.by_dag_hash) + + for spec in maybe_to_be_fetched: + if (not args.include_installed) and spec.installed: + continue + to_be_fetched.add(spec.dag_hash()) + +for dag_hash in to_be_fetched: + print(dag_hash) diff --git a/specfile_storage_path_build.py b/specfile_storage_path_build.py new file mode 100644 index 00000000..15cb90f8 --- /dev/null +++ b/specfile_storage_path_build.py @@ -0,0 +1,48 @@ +import argparse +from collections.abc import Iterable +import pathlib +import ruamel.yaml as yaml +import spack +import spack.binary_distribution as bindist + +parser = argparse.ArgumentParser( + prog='specfile_storage_path_build.py', + description='Extracting storage paths to the build cache from a given specfile', + epilog='...') + +parser.add_argument( + "path_specfile", type=pathlib.Path, + help="Location of the specfile to parse") + +parser.add_argument( + "--include-installed", + action='store_true', default=False, + help="Include already installed specs.") + +args = parser.parse_args() + +with open(args.path_specfile, "r") as fd: + file_content = fd.read() + data = list(yaml.safe_load_all(file_content)) + +to_be_fetched = set() +for rspec in data: + s = spack.spec.Spec.from_dict(rspec) + if not isinstance(s, Iterable): + s = [s] + + maybe_to_be_fetched = spack.traverse.traverse_nodes(s, key=spack.traverse.by_dag_hash) + + for spec in maybe_to_be_fetched: + if (not args.include_installed) and spec.installed: + continue + build_cache_paths = [ + bindist.tarball_path_name(spec, ".spack"), + bindist.tarball_name(spec, ".spec.json.sig"), + bindist.tarball_name(spec, ".spec.json"), + bindist.tarball_name(spec, ".spec.yaml"), + ] + to_be_fetched.add(str(spec.dag_hash()) + " ".join(build_cache_paths)) + +for elem in to_be_fetched: + print(elem) diff --git a/specfile_storage_path_source.py b/specfile_storage_path_source.py new file mode 100644 index 00000000..6e8a8889 --- /dev/null +++ b/specfile_storage_path_source.py @@ -0,0 +1,62 @@ +import argparse +from collections.abc import Iterable +import llnl.util.filesystem as fsys +import os +import pathlib +import ruamel.yaml as yaml +import spack +import sys + +parser = argparse.ArgumentParser( + prog='specfile_storage_path_source.py', + description='Extracting storage paths to the source cache from a given specfile', + epilog='...') + +parser.add_argument( + "path_specfile", type=pathlib.Path, + help="Location of the specfile to parse") + +parser.add_argument( + "--include-installed", + action='store_true', default=False, + help="Include already installed specs.") + +args = parser.parse_args() + +with open(args.path_specfile, "r") as fd: + file_content = fd.read() + data = list(yaml.safe_load_all(file_content)) + +to_be_fetched = set() +for rspec in data: + s = spack.spec.Spec.from_dict(rspec) + if not isinstance(s, Iterable): + s = [s] + + maybe_to_be_fetched = spack.traverse.traverse_nodes(s, key=spack.traverse.by_dag_hash) + + for ss in maybe_to_be_fetched: + if (not args.include_installed) and ss.installed: + continue + + pkg = ss.package + + # Some packages are not cachable (e.g. branch-name-only versions, or BundlePackages) + if not pkg.fetcher.cachable: + continue + + # TODO: pkg.fetcher.mirror_id() might be almost sufficient…) + + format_string = "{name}-{version}" + pretty_name = pkg.spec.format_path(format_string) + cosmetic_path = os.path.join(pkg.name, pretty_name) + to_be_fetched.add(str(spack.mirror.mirror_archive_paths(pkg.fetcher, cosmetic_path).storage_path)) + for resource in pkg._get_needed_resources(): + pretty_resource_name = fsys.polite_filename(f"{resource.name}-{pkg.version}") + to_be_fetched.add(str(spack.mirror.mirror_archive_paths(resource.fetcher, pretty_resource_name).storage_path)) + for patch in ss.patches: + if isinstance(patch, spack.patch.UrlPatch): + to_be_fetched.add(str(spack.mirror.mirror_archive_paths(patch.stage.fetcher, patch.stage.name).storage_path)) + +for elem in to_be_fetched: + print(elem) diff --git a/update_cached_buildresults.py b/update_cached_buildresults.py new file mode 100644 index 00000000..caacf86e --- /dev/null +++ b/update_cached_buildresults.py @@ -0,0 +1,83 @@ +import argparse +import glob +import os +import pathlib +import subprocess +import sys + +parser = argparse.ArgumentParser( + prog='update_cached_buildresults.py', + description='Uploading previously missing build results to a cache.', + epilog='...') + +parser.add_argument( + "path_missing", type=pathlib.Path, + help="Location of the file that lists the hashes and packages not yet in the build cache.") + +parser.add_argument( + "--remote-cache", type=str, required=(not (("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ))), + default=(("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ)) and + ("{}/{}/build_cache".format(os.environ["HARBOR_HOST"], os.environ["HARBOR_PROJECT"])) or "", + help="Path or URL to remote cache (target).") + +parser.add_argument( + "--remote-cache-type", type=str, choices=["oci"], + default="oci", + help="Type of the remote cache.") + +parser.add_argument( + "--remote-cache-username", type=str, required=(not "HARBOR_USERNAME" in os.environ), + default="HARBOR_USERNAME" in os.environ and + pathlib.Path(os.environ["HARBOR_USERNAME"]) or "", + help="Username for remote cache (if applicable)") + +parser.add_argument( + "--remote-cache-password", type=str, required=(not "HARBOR_PASSWORD" in os.environ), + default="HARBOR_PASSWORD" in os.environ and + pathlib.Path(os.environ["HARBOR_PASSWORD"]) or None, + help="Password for remote cache (if applicable)") + +parser.add_argument( + "--local-cache", type=str, + default="YASHCHIKI_CACHE_BUILD" in os.environ and + pathlib.Path(os.environ["YASHCHIKI_CACHE_BUILD"]) or + os.path.expanduser("~/.yashchiki/cache/"), + help="Path to local spack cache folder (build results).") + +args = parser.parse_args() + +if not os.path.exists(args.path_missing): + print("File w/ missing cached build information is not available: {}".format(args.path_missing)) + sys.exit(0) + +packages = {} +with open(args.path_missing, "r") as fd: + lines = fd.readlines() + for line in lines: + elems = line.split() + packages[elems[0]] = elems[1:] + + for package_dag_hash, paths in packages.items(): + basenames = [ os.path.basename(path) for path in paths] + + for path, basename in zip(paths, basenames): + full_path = pathlib.Path(str(args.local_cache) + "/" + path) + + if ((str(full_path).endswith(".spack") or str(full_path).endswith(".spec.json")) and not full_path.exists()): + print(f"Missing local cache entry for \"{full_path}\"") + continue + + if not full_path.exists(): + # we don't care about other file endings for now + continue + + cmd = ("oras", "push", + "--username", args.remote_cache_username, + "--password", args.remote_cache_password, + f"--annotation=path={path}", + f"{args.remote_cache}:{basename}", + f"{path}") + try: + subprocess.check_output(cmd, cwd=args.local_cache) + except subprocess.CalledProcessError as e: + print(f"Uploading of \"{path}\" to \"{args.remote_cache}:{basename}\" failed.") diff --git a/update_cached_sources.py b/update_cached_sources.py new file mode 100644 index 00000000..872b05c1 --- /dev/null +++ b/update_cached_sources.py @@ -0,0 +1,69 @@ +import argparse +import os +import pathlib +import subprocess +import sys + +parser = argparse.ArgumentParser( + prog='update_cached_sources.py', + description='Uploading previously missing source files to a cache.', + epilog='...') + +parser.add_argument( + "path_missing", type=pathlib.Path, + help="Location of the file that lists the hashes and packages not yet in the source cache.") + +parser.add_argument( + "--remote-cache", type=str, required=(not (("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ))), + default=(("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ)) and + ("{}/{}/source_cache".format(os.environ["HARBOR_HOST"], os.environ["HARBOR_PROJECT"])) or "", + help="Path or URL to remote cache (target).") + +parser.add_argument( + "--remote-cache-type", type=str, choices=["oci"], + default="oci", + help="Type of the remote cache.") + +parser.add_argument( + "--remote-cache-username", type=str, required=(not "HARBOR_USERNAME" in os.environ), + default="HARBOR_USERNAME" in os.environ and + pathlib.Path(os.environ["HARBOR_USERNAME"]) or "", + help="Username for remote cache (if applicable)") + +parser.add_argument( + "--remote-cache-password", type=str, required=(not "HARBOR_PASSWORD" in os.environ), + default="HARBOR_PASSWORD" in os.environ and + pathlib.Path(os.environ["HARBOR_PASSWORD"]) or None, + help="Password for remote cache (if applicable)") + +parser.add_argument( + "--local-cache", type=str, + default="YASHCHIKI_CACHE_SOURCE" in os.environ and + pathlib.Path(os.environ["YASHCHIKI_CACHE_SOURCE"]) or + os.path.expanduser("~/.yashchiki/cache/"), + help="Path to local spack cache folder (source).") + +args = parser.parse_args() + +if not os.path.exists(args.path_missing): + print("File w/ missing cached source information is not available: {}".format(args.path_missing)) + sys.exit(0) + +with open(args.path_missing, "r") as fd: + missing_file_paths = fd.readlines() + + for path in missing_file_paths: + stripped_path = path.rstrip() + basename = os.path.basename(stripped_path) + full_path = pathlib.Path(str(args.local_cache) + "/" + stripped_path) + + cmd = ("oras", "push", + "--username", args.remote_cache_username, + "--password", args.remote_cache_password, + f"--annotation=path={stripped_path}", + f"{args.remote_cache}:{basename}", + f"{stripped_path}") + try: + subprocess.check_output(cmd, cwd=args.local_cache) + except subprocess.CalledProcessError as e: + print(f"Uploading of \"{stripped_path}\" to \"{args.remote_cache}:{basename}\" failed.") -- GitLab