From ab43ca9a9fd2207a2b33b32d811e75d64f99542c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eric=20M=C3=BCller?= <mueller@kip.uni-heidelberg.de>
Date: Mon, 9 Dec 2024 14:40:11 +0100
Subject: [PATCH] draft: split out source cache fetcher and updater

---
 lib/yashchiki/fetch_cached_buildresults.py    | 121 ++++++++++++++++++
 lib/yashchiki/fetch_cached_sources.py         | 107 ++++++++++++++++
 lib/yashchiki/specfile_dag_hash.py            |  16 +--
 lib/yashchiki/specfile_storage_path.py        |   4 +-
 lib/yashchiki/specfile_storage_path_build.py  |  40 ++++++
 lib/yashchiki/update_cached_buildresults.py   |  83 ++++++++++++
 lib/yashchiki/update_cached_sources.py        |  69 ++++++++++
 share/yashchiki/utils/fetch_cached_sources.sh |  68 ++++++++++
 share/yashchiki/utils/fetch_sources.sh        |  47 -------
 .../yashchiki/utils/update_cached_sources.sh  |  39 ++++++
 10 files changed, 534 insertions(+), 60 deletions(-)
 create mode 100644 lib/yashchiki/fetch_cached_buildresults.py
 create mode 100644 lib/yashchiki/fetch_cached_sources.py
 create mode 100644 lib/yashchiki/specfile_storage_path_build.py
 create mode 100644 lib/yashchiki/update_cached_buildresults.py
 create mode 100644 lib/yashchiki/update_cached_sources.py
 create mode 100755 share/yashchiki/utils/fetch_cached_sources.sh
 delete mode 100755 share/yashchiki/utils/fetch_sources.sh
 create mode 100755 share/yashchiki/utils/update_cached_sources.sh

diff --git a/lib/yashchiki/fetch_cached_buildresults.py b/lib/yashchiki/fetch_cached_buildresults.py
new file mode 100644
index 00000000..e94367cb
--- /dev/null
+++ b/lib/yashchiki/fetch_cached_buildresults.py
@@ -0,0 +1,121 @@
+import argparse
+import os
+import pathlib
+import subprocess
+
+parser = argparse.ArgumentParser(
+        prog='fetch_cached_buildresults.py',
+        description='Downloading missing source files to a spack cache.',
+        epilog='...')
+
+parser.add_argument(
+    "path_missing", type=pathlib.Path,
+    help="Location of the file listing the missing source files.")
+
+parser.add_argument(
+    "specfiles", nargs="+",
+    help="Location of the file listing the missing source files.")
+
+parser.add_argument(
+    "--remote-cache", type=str, required=(not (("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ))),
+    default=(("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ)) and
+            ("{}/{}/build_cache".format(os.environ["HARBOR_HOST"], os.environ["HARBOR_PROJECT"])) or "",
+    help="Path or URL to remote cache (target).")
+
+parser.add_argument(
+    "--remote-cache-type", type=str, choices=["oci"],
+    default="oci",
+    help="Type of the remote cache.")
+
+parser.add_argument(
+    "--remote-cache-username", type=str,
+    default="HARBOR_USERNAME" in os.environ and
+        pathlib.Path(os.environ["HARBOR_USERNAME"]) or "",
+    help="Username for remote cache (if applicable)")
+
+parser.add_argument(
+    "--remote-cache-password", type=str,
+    default="HARBOR_PASSWORD" in os.environ and
+        pathlib.Path(os.environ["HARBOR_PASSWORD"]) or None,
+    help="Password for remote cache (if applicable)")
+
+parser.add_argument(
+    "--local-cache", type=str,
+    default="YASHCHIKI_CACHE_BUILD" in os.environ and
+        pathlib.Path(os.environ["YASHCHIKI_CACHE_BUILD"]) or
+        os.path.expanduser("~/.yashchiki/cache/"),
+    help="Path to local spack cache folder (build results).")
+
+args = parser.parse_args()
+
+local_cache = pathlib.Path(args.local_cache)
+if not os.path.exists(args.local_cache):
+    print("Creating local build cache directory")
+    local_cache.mkdir(parents=True, exist_ok=False)
+
+missing_packages = []
+available_packages = []
+cached_paths = []
+cmd = ["oras", "repo", "tags"]
+if args.remote_cache_username and args.remote_cache_password:
+    cmd.extend(["--username", args.remote_cache_username])
+    cmd.extend(["--password", args.remote_cache_password])
+cmd.append(args.remote_cache)
+try:
+    tags = subprocess.check_output(cmd)
+    tags = tags.decode("utf-8")
+    cached_paths = tags.split()
+except subprocess.CalledProcessError as e:
+    print(f"Listing repo tags of \"{args.remote_cache}\" failed.")
+
+for specfile in args.specfiles:
+    with open(specfile, "r") as fd:
+        fetch_paths = []
+        packages = {}
+        try:
+            my_spack_folder = os.environ["MY_SPACK_FOLDER"]
+            my_spack_python = os.environ["MY_SPACK_PYTHON"]
+            yashchiki_home = os.environ["YASHCHIKI_HOME"]
+            # FIXME: call python!
+            lines = subprocess.check_output(f"PATH={my_spack_folder}/bin:$PATH {my_spack_python} {yashchiki_home}/lib/yashchiki/specfile_storage_path_build.py {specfile}", shell=True)
+            lines = lines.decode("utf-8")
+            lines = lines.split("\n")
+            for line in lines:
+                if not line:
+                    continue
+                elems = line.split()
+                packages[elems[0]] = elems[1:]
+        except subprocess.CalledProcessError as e:
+            print(f"Computing fetch buildresult paths failed:", str(e), e.output)
+        for package_dag_hash, fetch_paths in packages.items():
+            missing_paths = []
+            for fetch_path in fetch_paths:
+                basename = os.path.basename(fetch_path)
+                if basename in cached_paths:
+                    cmd = ["oras", "pull"]
+                    if args.remote_cache_username and args.remote_cache_password:
+                        cmd.extend(["--username", args.remote_cache_username])
+                        cmd.extend(["--password", args.remote_cache_password])
+                    cmd.append(args.remote_cache + f":{basename}")
+                    try:
+                        subprocess.check_output(cmd, stderr=subprocess.STDOUT, cwd=local_cache)
+                    except subprocess.CalledProcessError as e:
+                        print(f"Pulling of \"{basename}\" from \"{args.remote_cache}\" failed.")
+                        missing_paths.append(fetch_path)
+                else:
+                    missing_paths.append(fetch_path)
+            package_missing = False
+            for missing_path in missing_paths:
+                if missing_path.endswith(".spack") or missing_path.endswith(".spec.json"):
+                    package_missing = True
+            if package_missing:
+                missing_packages.append(f"{package_dag_hash} " + " ".join(missing_paths))
+            else:
+                available_packages.append(f"{package_dag_hash} " + " ".join(missing_paths))
+
+print(len(missing_packages), "missing packages in remote buildresults cache.")
+print(len(available_packages), "available packages in remote buildresults cache.")
+
+if missing_packages:
+    with open(args.path_missing, "w") as fd:
+        fd.write("\n".join(missing_packages))
diff --git a/lib/yashchiki/fetch_cached_sources.py b/lib/yashchiki/fetch_cached_sources.py
new file mode 100644
index 00000000..8e7072a0
--- /dev/null
+++ b/lib/yashchiki/fetch_cached_sources.py
@@ -0,0 +1,107 @@
+import argparse
+import os
+import pathlib
+import subprocess
+
+parser = argparse.ArgumentParser(
+        prog='fetch_cached_sources.py',
+        description='Downloading missing source files to a spack cache.',
+        epilog='...')
+
+parser.add_argument(
+    "path_missing", type=pathlib.Path,
+    help="Location of the file listing the missing source files.")
+
+parser.add_argument(
+    "specfiles", nargs="+",
+    help="Location of the file listing the missing source files.")
+
+parser.add_argument(
+    "--remote-cache", type=str, required=(not (("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ))),
+    default=(("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ)) and
+            ("{}/{}/source_cache".format(os.environ["HARBOR_HOST"], os.environ["HARBOR_PROJECT"])) or "",
+    help="Path or URL to remote cache (target).")
+
+parser.add_argument(
+    "--remote-cache-type", type=str, choices=["oci"],
+    default="oci",
+    help="Type of the remote cache.")
+
+parser.add_argument(
+    "--remote-cache-username", type=str,
+    default="HARBOR_USERNAME" in os.environ and
+        pathlib.Path(os.environ["HARBOR_USERNAME"]) or "",
+    help="Username for remote cache (if applicable)")
+
+parser.add_argument(
+    "--remote-cache-password", type=str,
+    default="HARBOR_PASSWORD" in os.environ and
+        pathlib.Path(os.environ["HARBOR_PASSWORD"]) or None,
+    help="Password for remote cache (if applicable)")
+
+parser.add_argument(
+    "--local-cache", type=str,
+    default="YASHCHIKI_CACHE_SOURCE" in os.environ and
+        pathlib.Path(os.environ["YASHCHIKI_CACHE_SOURCE"]) or
+        os.path.expanduser("~/.yashchiki/cache/"),
+    help="Path to local spack cache folder (source).")
+
+args = parser.parse_args()
+
+local_cache = pathlib.Path(args.local_cache)
+if not os.path.exists(args.local_cache):
+    print("Creating local cache directory")
+    local_cache.mkdir(parents=True, exist_ok=False)
+
+missing_paths = []
+available_paths = []
+cached_paths = []
+cmd = ["oras", "repo", "tags"]
+if args.remote_cache_username and args.remote_cache_password:
+    cmd.extend(["--username", args.remote_cache_username])
+    cmd.extend(["--password", args.remote_cache_password])
+cmd.append(args.remote_cache)
+try:
+    tags = subprocess.check_output(cmd)
+    tags = tags.decode("utf-8")
+    cached_paths = tags.split()
+except subprocess.CalledProcessError as e:
+    print(f"Listing repo tags of \"{args.remote_cache}\" failed.")
+
+for specfile in args.specfiles:
+    with open(specfile, "r") as fd:
+        fetch_paths = []
+        try:
+            my_spack_folder = os.environ["MY_SPACK_FOLDER"]
+            my_spack_python = os.environ["MY_SPACK_PYTHON"]
+            yashchiki_home = os.environ["YASHCHIKI_HOME"]
+            # FIXME: call python!
+            paths = subprocess.check_output(f"PATH={my_spack_folder}/bin:$PATH {my_spack_python} {yashchiki_home}/lib/yashchiki/specfile_storage_path.py {specfile}", shell=True)
+            paths = paths.decode("utf-8")
+            fetch_paths = paths.split()
+        except subprocess.CalledProcessError as e:
+            print(f"Computing fetch storage paths failed for {specfile}.")
+            continue
+        for fetch_path in fetch_paths:
+            basename = os.path.basename(fetch_path)
+            if basename in cached_paths:
+                cmd = ["oras", "pull"]
+                if args.remote_cache_username and args.remote_cache_password:
+                    cmd.extend(["--username", args.remote_cache_username])
+                    cmd.extend(["--password", args.remote_cache_password])
+                cmd.append(args.remote_cache + f":{basename}")
+                try:
+                    subprocess.check_output(cmd, stderr=subprocess.STDOUT, cwd=local_cache)
+                except subprocess.CalledProcessError as e:
+                    print(f"Pulling of \"{basename}\" from \"{args.remote_cache}\" failed.")
+                    missing_paths.append(fetch_path)
+                available_paths.append(fetch_path)
+            else:
+                missing_paths.append(fetch_path)
+
+print(len(missing_paths), "missing files in remote source cache.")
+print(len(available_paths), "available files in remote source cache.")
+
+if missing_paths:
+    with open(args.path_missing, "w") as fd:
+        fd.write("\n".join(missing_paths))
diff --git a/lib/yashchiki/specfile_dag_hash.py b/lib/yashchiki/specfile_dag_hash.py
index 76a16826..a7707819 100644
--- a/lib/yashchiki/specfile_dag_hash.py
+++ b/lib/yashchiki/specfile_dag_hash.py
@@ -2,7 +2,7 @@ import argparse
 from collections.abc import Iterable
 import pathlib
 import ruamel.yaml as yaml
-from spack import spec
+import spack
 import spack.binary_distribution as bindist
 
 parser = argparse.ArgumentParser(
@@ -22,17 +22,13 @@ with open(args.path_specfile, "r") as fd:
 
 to_be_fetched = []
 for rspec in data:
-    s = spec.Spec.from_dict(rspec)
+    s = spack.spec.Spec.from_dict(rspec)
     if not isinstance(s, Iterable):
         s = [s]
 
     maybe_to_be_fetched = spack.traverse.traverse_nodes(s, key=spack.traverse.by_dag_hash)
-    
+
     for spec in maybe_to_be_fetched:
-        build_cache_paths = [
-            bindist.tarball_path_name(spec, ".spack"),
-            bindist.tarball_name(spec, ".spec.json.sig"),
-            bindist.tarball_name(spec, ".spec.json"),
-            bindist.tarball_name(spec, ".spec.yaml"),
-        ]
-        print("\n".join(build_cache_paths))
+        if spec.installed:
+            continue
+        print(spec.dag_hash())
diff --git a/lib/yashchiki/specfile_storage_path.py b/lib/yashchiki/specfile_storage_path.py
index 1d5b13dc..8c635068 100644
--- a/lib/yashchiki/specfile_storage_path.py
+++ b/lib/yashchiki/specfile_storage_path.py
@@ -3,7 +3,6 @@ from collections.abc import Iterable
 import pathlib
 import ruamel.yaml as yaml
 import spack
-from spack import spec
 
 parser = argparse.ArgumentParser(
         prog='specfile_storage_path.py',
@@ -22,7 +21,7 @@ with open(args.path_specfile, "r") as fd:
 
 to_be_fetched = []
 for rspec in data:
-    s = spec.Spec.from_dict(rspec)
+    s = spack.spec.Spec.from_dict(rspec)
     if not isinstance(s, Iterable):
         s = [s]
 
@@ -35,4 +34,3 @@ for rspec in data:
         pkg = ss.package
         to_be_fetched.append(pkg)
         print(spack.mirror.mirror_archive_paths(pkg.fetcher, 'whatever').storage_path)
-    print("")
diff --git a/lib/yashchiki/specfile_storage_path_build.py b/lib/yashchiki/specfile_storage_path_build.py
new file mode 100644
index 00000000..6af4a0bb
--- /dev/null
+++ b/lib/yashchiki/specfile_storage_path_build.py
@@ -0,0 +1,40 @@
+import argparse
+from collections.abc import Iterable
+import pathlib
+import ruamel.yaml as yaml
+import spack
+import spack.binary_distribution as bindist
+
+parser = argparse.ArgumentParser(
+        prog='specfile_dag_hash.py',
+        description='Extracting DAG hashes from a given specfile',
+        epilog='...')
+
+parser.add_argument(
+    "path_specfile", type=pathlib.Path,
+    help="Location of the specfile to parse")
+
+args = parser.parse_args()
+
+with open(args.path_specfile, "r") as fd:
+    file_content = fd.read()
+    data = list(yaml.safe_load_all(file_content))
+
+to_be_fetched = []
+for rspec in data:
+    s = spack.spec.Spec.from_dict(rspec)
+    if not isinstance(s, Iterable):
+        s = [s]
+
+    maybe_to_be_fetched = spack.traverse.traverse_nodes(s, key=spack.traverse.by_dag_hash)
+
+    for spec in maybe_to_be_fetched:
+        if spec.installed:
+            continue
+        build_cache_paths = [
+            bindist.tarball_path_name(spec, ".spack"),
+            bindist.tarball_name(spec, ".spec.json.sig"),
+            bindist.tarball_name(spec, ".spec.json"),
+            bindist.tarball_name(spec, ".spec.yaml"),
+        ]
+        print(spec.dag_hash(), " ".join(build_cache_paths))
diff --git a/lib/yashchiki/update_cached_buildresults.py b/lib/yashchiki/update_cached_buildresults.py
new file mode 100644
index 00000000..bb63d388
--- /dev/null
+++ b/lib/yashchiki/update_cached_buildresults.py
@@ -0,0 +1,83 @@
+import argparse
+import glob
+import os
+import pathlib
+import subprocess
+import sys
+
+parser = argparse.ArgumentParser(
+        prog='update_cached_buildresults.py',
+        description='Uploading previously missing build results to a cache.',
+        epilog='...')
+
+parser.add_argument(
+    "path_missing", type=pathlib.Path,
+    help="Location of the file listing the missing source files.")
+
+parser.add_argument(
+    "--remote-cache", type=str, required=(not (("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ))),
+    default=(("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ)) and
+            ("{}/{}/build_cache".format(os.environ["HARBOR_HOST"], os.environ["HARBOR_PROJECT"])) or "",
+    help="Path or URL to remote cache (target).")
+
+parser.add_argument(
+    "--remote-cache-type", type=str, choices=["oci"],
+    default="oci",
+    help="Type of the remote cache.")
+
+parser.add_argument(
+    "--remote-cache-username", type=str, required=(not "HARBOR_USERNAME" in os.environ),
+    default="HARBOR_USERNAME" in os.environ and
+        pathlib.Path(os.environ["HARBOR_USERNAME"]) or "",
+    help="Username for remote cache (if applicable)")
+
+parser.add_argument(
+    "--remote-cache-password", type=str, required=(not "HARBOR_PASSWORD" in os.environ),
+    default="HARBOR_PASSWORD" in os.environ and
+        pathlib.Path(os.environ["HARBOR_PASSWORD"]) or None,
+    help="Password for remote cache (if applicable)")
+
+parser.add_argument(
+    "--local-cache", type=str,
+    default="YASHCHIKI_CACHE_BUILD" in os.environ and
+        pathlib.Path(os.environ["YASHCHIKI_CACHE_BUILD"]) or
+        os.path.expanduser("~/.yashchiki/cache/"),
+    help="Path to local spack cache folder (build results).")
+
+args = parser.parse_args()
+
+if not os.path.exists(args.path_missing):
+    print("File w/ missing package information is not available")
+    sys.exit(0)
+
+packages = {}
+with open(args.path_missing, "r") as fd:
+    lines = fd.readlines()
+    for line in lines:
+        elems = line.split()
+        packages[elems[0]] = elems[1:]
+
+    for package_dag_hash, paths in packages.items():
+        basenames = [ os.path.basename(path) for path in paths]
+
+        for path, basename in zip(paths, basenames):
+            full_path = pathlib.Path(str(args.local_cache) + "/" + path)
+
+            if ((str(full_path).endswith(".spack") or str(full_path).endswith(".spec.json")) and not full_path.exists()):
+                print(f"Missing local cache entry for \"{full_path}\"")
+                continue
+
+            if not full_path.exists():
+                # we don't care about other file endings for now
+                continue
+
+            cmd = ("oras", "push",
+                    "--username", args.remote_cache_username,
+                    "--password", args.remote_cache_password,
+                    f"--annotation=path={path}",
+                    f"{args.remote_cache}:{basename}",
+                    f"{path}")
+            try:
+                subprocess.check_output(cmd, cwd=args.local_cache)
+            except subprocess.CalledProcessError as e:
+                print(f"Uploading of \"{path}\" to \"{args.remote_cache}:{basename}\" failed.")
diff --git a/lib/yashchiki/update_cached_sources.py b/lib/yashchiki/update_cached_sources.py
new file mode 100644
index 00000000..f8c80052
--- /dev/null
+++ b/lib/yashchiki/update_cached_sources.py
@@ -0,0 +1,69 @@
+import argparse
+import os
+import pathlib
+import subprocess
+import sys
+
+parser = argparse.ArgumentParser(
+        prog='update_cached_sources.py',
+        description='Uploading previously missing source files to a cache.',
+        epilog='...')
+
+parser.add_argument(
+    "path_missing", type=pathlib.Path,
+    help="Location of the file listing the missing source files.")
+
+parser.add_argument(
+    "--remote-cache", type=str, required=(not (("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ))),
+    default=(("HARBOR_HOST" in os.environ) and ("HARBOR_PROJECT" in os.environ)) and
+            ("{}/{}/source_cache".format(os.environ["HARBOR_HOST"], os.environ["HARBOR_PROJECT"])) or "",
+    help="Path or URL to remote cache (target).")
+
+parser.add_argument(
+    "--remote-cache-type", type=str, choices=["oci"],
+    default="oci",
+    help="Type of the remote cache.")
+
+parser.add_argument(
+    "--remote-cache-username", type=str, required=(not "HARBOR_USERNAME" in os.environ),
+    default="HARBOR_USERNAME" in os.environ and
+        pathlib.Path(os.environ["HARBOR_USERNAME"]) or "",
+    help="Username for remote cache (if applicable)")
+
+parser.add_argument(
+    "--remote-cache-password", type=str, required=(not "HARBOR_PASSWORD" in os.environ),
+    default="HARBOR_PASSWORD" in os.environ and
+        pathlib.Path(os.environ["HARBOR_PASSWORD"]) or None,
+    help="Password for remote cache (if applicable)")
+
+parser.add_argument(
+    "--local-cache", type=str,
+    default="YASHCHIKI_CACHE_SOURCE" in os.environ and
+        pathlib.Path(os.environ["YASHCHIKI_CACHE_SOURCE"]) or
+        os.path.expanduser("~/.yashchiki/cache/"),
+    help="Path to local spack cache folder (source).")
+
+args = parser.parse_args()
+
+if not os.path.exists(args.path_missing):
+    print("File w/ missing source path information is not available")
+    sys.exit(0)
+
+with open(args.path_missing, "r") as fd:
+    missing_file_paths = fd.readlines()
+
+    for path in missing_file_paths:
+        stripped_path = path.rstrip()
+        basename = os.path.basename(stripped_path)
+        full_path = pathlib.Path(str(args.local_cache) + "/" + stripped_path)
+
+        cmd = ("oras", "push",
+                "--username", args.remote_cache_username,
+                "--password", args.remote_cache_password,
+                f"--annotation=path={stripped_path}",
+                f"{args.remote_cache}:{basename}",
+                f"{stripped_path}")
+        try:
+            subprocess.check_output(cmd, cwd=args.local_cache)
+        except subprocess.CalledProcessError as e:
+            print(f"Uploading of \"{stripped_path}\" to \"{args.remote_cache}:{basename}\" failed.")
diff --git a/share/yashchiki/utils/fetch_cached_sources.sh b/share/yashchiki/utils/fetch_cached_sources.sh
new file mode 100755
index 00000000..3dafe90a
--- /dev/null
+++ b/share/yashchiki/utils/fetch_cached_sources.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+set -euo pipefail
+shopt -s inherit_errexit
+
+#set -x
+head "./env_specfile.yaml"
+
+fetch_specfiles=()
+fetch_specfiles_cache=()
+
+# spack fetch doesn't like proper multi-doc separators (---)
+# FIXME: add --quiet; replace -z by long version --elide-empty-files
+csplit -z --prefix=env_specfile_split --suffix-format=%02d.yaml ./env_specfile.yaml /---/ '{*}'
+ls #FIXME
+for split_specfile in ./env_specfile_split*.yaml; do
+    fetch_specfiles+=( ${split_specfile} )
+done
+# multi-doc style for "normal" yaml parsing
+fetch_specfiles_cache+=( "./env_specfile.yaml" )
+
+echo "Path to source cache: ${YASHCHIKI_CACHE_SOURCE}"
+mkdir -p ${YASHCHIKI_CACHE_SOURCE} || true
+
+missing_paths=()
+if [ -n "${CACHE_SOURCE_TYPE:-}" ]; then
+    if [ ${CACHE_SOURCE_TYPE} != "oci" ]; then
+        echo "Unknown cache type"
+        exit 1
+    fi
+    cached_paths=$(oras repo tags ${HARBOR_HOST}/${HARBOR_PROJECT}/source_cache 2>&1)
+    if [ $? -ne 0 ]; then
+        echo "ERROR: OCI repo query failed."
+    fi
+    echo "cached_paths: \"${cached_paths}\""
+    for fetch_specfile in "${fetch_specfiles_cache[@]}"; do
+        raw_paths=$(PATH=${MY_SPACK_FOLDER}/bin:$PATH ${MY_SPACK_PYTHON} ${YASHCHIKI_HOME}/lib/yashchiki/specfile_storage_path.py ${fetch_specfile} 2>/dev/null)
+        echo "raw_paths: \"$raw_paths\""
+        fetch_paths=(${raw_paths})
+        echo "fetch_paths: \"${fetch_paths[@]}\""
+        pushd ${YASHCHIKI_CACHE_SOURCE} >/dev/null
+        for fetch_path in "${fetch_paths[@]}"; do
+            if [ -z "${fetch_path}" ]; then
+                echo "ERROR: fetch_path is empty: \"${fetch_path}\""
+                continue
+            else
+                echo "fetch_path is \"${fetch_path}\""
+            fi
+            if [[ " ${cached_paths[*]} " =~ " $(basename ${fetch_path}) " ]]; then
+                echo "found \"$(basename ${fetch_path})\" in cached_paths"
+                oras pull ${HARBOR_HOST}/${HARBOR_PROJECT}/source_cache:$(basename ${fetch_path}) >/dev/null 2>&1 && ret=$? || ret=$?
+                if [ ${ret} -ne 0 ]; then
+                    echo "Fetching failed."
+                    missing_paths+=( "${fetch_path}" )
+                else
+                    echo "Fetched: \"${fetch_path}\""
+                fi
+            else
+                echo "Did not find \"${fetch_path}\" in cached_paths"
+                missing_paths+=( "${fetch_path}" )
+            fi
+        done
+        popd >/dev/null
+    done
+    echo "Missing source cache entries: ${missing_paths[@]}"
+fi
+
+printf "%s\n" "${missing_paths[@]}" >missing_paths.dat
diff --git a/share/yashchiki/utils/fetch_sources.sh b/share/yashchiki/utils/fetch_sources.sh
deleted file mode 100755
index 82cc45a6..00000000
--- a/share/yashchiki/utils/fetch_sources.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-shopt -s inherit_errexit
-
-# TODO:
-# expects active environment
-
-(
-    specfile="./env_specfile.yaml";
-    # TODO: expects loaded env
-    spack spec -y > "${specfile}"
-) && echo "done." || (echo "FAILED."; exit 1)
-
-fetch_specfiles=()
-fetch_specfiles_cache=()
-
-# spack fetch doesn't like proper multi-doc separators (---)
-csplit -z --prefix=env_specfile_split --suffix-format=%02d.yaml ./env_specfile.yaml /---/ '{*}'
-for split_specfile in ./env_specfile_split*.yaml; do
-    fetch_specfiles+=( ${split_specfile} )
-done
-# multi-doc style for "normal" yaml parsing
-fetch_specfiles_cache+=( "./env_specfile.yaml" )
-
-
-missing_paths=()
-if [ -n "${CACHE_SOURCE_TYPE:-}" ]; then
-    if [ ${CACHE_SOURCE_TYPE} != "oci" ]; then
-        echo "Unknown cache type"
-        exit 1
-    fi
-    for fetch_specfile in "${fetch_specfiles_cache[@]}"; do
-        raw_paths=$(PATH=${MY_SPACK_FOLDER}/bin:$PATH ${MY_SPACK_PYTHON} ${YASHCHIKI_HOME}/lib/yashchiki/specfile_storage_path.py ${fetch_specfile})
-        fetch_paths=(${raw_paths})
-        pushd ${YASHCHIKI_CACHES_ROOT}/download_cache
-        for fetch_path in "${fetch_paths[@]}"; do
-            # FIXME: gitlab env vars!
-            oras pull ${HARBOR_HOST}/${HARBOR_PROJECT}/source_cache:$(basename ${fetch_path}) 2>&1 && ret=$? || ret=$?
-            if [ ${ret} -ne 0 ]; then
-                missing_paths+=( "${fetch_path}" )
-            fi
-        done
-        popd
-    done
-    echo "Missing source cache entries: ${missing_paths[@]}"
-fi
diff --git a/share/yashchiki/utils/update_cached_sources.sh b/share/yashchiki/utils/update_cached_sources.sh
new file mode 100755
index 00000000..12b31be2
--- /dev/null
+++ b/share/yashchiki/utils/update_cached_sources.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+set -euo pipefail
+shopt -s inherit_errexit
+
+#head missing_paths.dat
+readarray -t missing_paths <missing_paths.dat
+#echo "\"${missing_paths[@]}\""
+
+if [ -n "${CACHE_SOURCE_TYPE:-}" ]; then
+    if [ ${CACHE_SOURCE_TYPE} != "oci" ]; then
+        echo "Unknown cache type"
+        exit 1
+    fi
+    pushd ${YASHCHIKI_CACHE_SOURCE} >/dev/null
+    if [ -n "${missing_paths}" ]; then
+        for missing_path in "${missing_paths[@]}"; do
+            if [ -z "${missing_path}" ]; then
+                echo "ERROR: missing_path is empty."
+                continue
+            fi
+            if [ ! -e "${missing_path}" ]; then
+                echo "ERROR: missing_path is empty."
+                continue
+            fi
+            echo "Uploading to OCI cache: ${missing_path}"
+            oras push \
+                --username ${HARBOR_USERNAME} \
+                --password ${HARBOR_PASSWORD} \
+                --annotation="path=${missing_path}" \
+                ${HARBOR_HOST}/${HARBOR_PROJECT}/source_cache:$(basename ${missing_path}) \
+                ${missing_path} 2>&1 && ret=$? || ret=$?
+            if [ ${ret} -ne 0 ]; then
+                echo "Uploading of \"${missing_path}\" to OCI cache failed."
+            fi
+        done
+    fi
+    popd >/dev/null
+fi
-- 
GitLab