From b91cb89649c2cdb3cda1c12083e4dbeca27c8954 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sun, 2 Mar 2025 13:49:45 +0100 Subject: [PATCH 01/10] maintainers/scripts/update: Format with black --- maintainers/scripts/update.py | 260 ++++++++++++++++++++++++++-------- 1 file changed, 201 insertions(+), 59 deletions(-) diff --git a/maintainers/scripts/update.py b/maintainers/scripts/update.py index 7d0059db28a7..cb30063580b6 100644 --- a/maintainers/scripts/update.py +++ b/maintainers/scripts/update.py @@ -10,16 +10,20 @@ import subprocess import sys import tempfile + class CalledProcessError(Exception): process: asyncio.subprocess.Process stderr: Optional[bytes] + class UpdateFailedException(Exception): pass + def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) + async def check_subprocess_output(*args, **kwargs): """ Emulate check and capture_output arguments of subprocess.run function. @@ -38,26 +42,43 @@ async def check_subprocess_output(*args, **kwargs): return stdout -async def run_update_script(nixpkgs_root: str, merge_lock: asyncio.Lock, temp_dir: Optional[Tuple[str, str]], package: Dict, keep_going: bool): + +async def run_update_script( + nixpkgs_root: str, + merge_lock: asyncio.Lock, + temp_dir: Optional[Tuple[str, str]], + package: Dict, + keep_going: bool, +): worktree: Optional[str] = None - update_script_command = package['updateScript'] + update_script_command = package["updateScript"] if temp_dir is not None: worktree, _branch = temp_dir # Ensure the worktree is clean before update. - await check_subprocess_output('git', 'reset', '--hard', '--quiet', 'HEAD', cwd=worktree) + await check_subprocess_output( + "git", + "reset", + "--hard", + "--quiet", + "HEAD", + cwd=worktree, + ) # Update scripts can use $(dirname $0) to get their location but we want to run # their clones in the git worktree, not in the main nixpkgs repo. - update_script_command = map(lambda arg: re.sub(r'^{0}'.format(re.escape(nixpkgs_root)), worktree, arg), update_script_command) + update_script_command = map( + lambda arg: re.sub(r"^{0}".format(re.escape(nixpkgs_root)), worktree, arg), + update_script_command, + ) eprint(f" - {package['name']}: UPDATING ...") try: update_info = await check_subprocess_output( - 'env', + "env", f"UPDATE_NIX_NAME={package['name']}", f"UPDATE_NIX_PNAME={package['pname']}", f"UPDATE_NIX_OLD_VERSION={package['oldVersion']}", @@ -69,50 +90,72 @@ async def run_update_script(nixpkgs_root: str, merge_lock: asyncio.Lock, temp_di ) await merge_changes(merge_lock, package, update_info, temp_dir) except KeyboardInterrupt as e: - eprint('Cancelling…') + eprint("Cancelling…") raise asyncio.exceptions.CancelledError() except CalledProcessError as e: eprint(f" - {package['name']}: ERROR") eprint() eprint(f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------") eprint() - eprint(e.stderr.decode('utf-8')) - with open(f"{package['pname']}.log", 'wb') as logfile: + eprint(e.stderr.decode("utf-8")) + with open(f"{package['pname']}.log", "wb") as logfile: logfile.write(e.stderr) eprint() eprint(f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------") if not keep_going: - raise UpdateFailedException(f"The update script for {package['name']} failed with exit code {e.process.returncode}") + raise UpdateFailedException( + f"The update script for {package['name']} failed with exit code {e.process.returncode}" + ) + @contextlib.contextmanager def make_worktree() -> Generator[Tuple[str, str], None, None]: with tempfile.TemporaryDirectory() as wt: - branch_name = f'update-{os.path.basename(wt)}' - target_directory = f'{wt}/nixpkgs' + branch_name = f"update-{os.path.basename(wt)}" + target_directory = f"{wt}/nixpkgs" - subprocess.run(['git', 'worktree', 'add', '-b', branch_name, target_directory]) + subprocess.run(["git", "worktree", "add", "-b", branch_name, target_directory]) try: yield (target_directory, branch_name) finally: - subprocess.run(['git', 'worktree', 'remove', '--force', target_directory]) - subprocess.run(['git', 'branch', '-D', branch_name]) + subprocess.run(["git", "worktree", "remove", "--force", target_directory]) + subprocess.run(["git", "branch", "-D", branch_name]) -async def commit_changes(name: str, merge_lock: asyncio.Lock, worktree: str, branch: str, changes: List[Dict]) -> None: + +async def commit_changes( + name: str, + merge_lock: asyncio.Lock, + worktree: str, + branch: str, + changes: List[Dict], +) -> None: for change in changes: # Git can only handle a single index operation at a time async with merge_lock: - await check_subprocess_output('git', 'add', *change['files'], cwd=worktree) - commit_message = '{attrPath}: {oldVersion} -> {newVersion}'.format(**change) - if 'commitMessage' in change: - commit_message = change['commitMessage'] - elif 'commitBody' in change: - commit_message = commit_message + '\n\n' + change['commitBody'] - await check_subprocess_output('git', 'commit', '--quiet', '-m', commit_message, cwd=worktree) - await check_subprocess_output('git', 'cherry-pick', branch) + await check_subprocess_output("git", "add", *change["files"], cwd=worktree) + commit_message = "{attrPath}: {oldVersion} -> {newVersion}".format(**change) + if "commitMessage" in change: + commit_message = change["commitMessage"] + elif "commitBody" in change: + commit_message = commit_message + "\n\n" + change["commitBody"] + await check_subprocess_output( + "git", + "commit", + "--quiet", + "-m", + commit_message, + cwd=worktree, + ) + await check_subprocess_output("git", "cherry-pick", branch) -async def check_changes(package: Dict, worktree: str, update_info: str): - if 'commit' in package['supportedFeatures']: + +async def check_changes( + package: Dict, + worktree: str, + update_info: str, +): + if "commit" in package["supportedFeatures"]: changes = json.loads(update_info) else: changes = [{}] @@ -120,54 +163,93 @@ async def check_changes(package: Dict, worktree: str, update_info: str): # Try to fill in missing attributes when there is just a single change. if len(changes) == 1: # Dynamic data from updater take precedence over static data from passthru.updateScript. - if 'attrPath' not in changes[0]: + if "attrPath" not in changes[0]: # update.nix is always passing attrPath - changes[0]['attrPath'] = package['attrPath'] + changes[0]["attrPath"] = package["attrPath"] - if 'oldVersion' not in changes[0]: + if "oldVersion" not in changes[0]: # update.nix is always passing oldVersion - changes[0]['oldVersion'] = package['oldVersion'] + changes[0]["oldVersion"] = package["oldVersion"] - if 'newVersion' not in changes[0]: - attr_path = changes[0]['attrPath'] - obtain_new_version_output = await check_subprocess_output('nix-instantiate', '--expr', f'with import ./. {{}}; lib.getVersion {attr_path}', '--eval', '--strict', '--json', stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=worktree) - changes[0]['newVersion'] = json.loads(obtain_new_version_output.decode('utf-8')) + if "newVersion" not in changes[0]: + attr_path = changes[0]["attrPath"] + obtain_new_version_output = await check_subprocess_output( + "nix-instantiate", + "--expr", + f"with import ./. {{}}; lib.getVersion {attr_path}", + "--eval", + "--strict", + "--json", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=worktree, + ) + changes[0]["newVersion"] = json.loads( + obtain_new_version_output.decode("utf-8") + ) - if 'files' not in changes[0]: - changed_files_output = await check_subprocess_output('git', 'diff', '--name-only', 'HEAD', stdout=asyncio.subprocess.PIPE, cwd=worktree) + if "files" not in changes[0]: + changed_files_output = await check_subprocess_output( + "git", + "diff", + "--name-only", + "HEAD", + stdout=asyncio.subprocess.PIPE, + cwd=worktree, + ) changed_files = changed_files_output.splitlines() - changes[0]['files'] = changed_files + changes[0]["files"] = changed_files if len(changed_files) == 0: return [] return changes -async def merge_changes(merge_lock: asyncio.Lock, package: Dict, update_info: str, temp_dir: Optional[Tuple[str, str]]) -> None: + +async def merge_changes( + merge_lock: asyncio.Lock, + package: Dict, + update_info: str, + temp_dir: Optional[Tuple[str, str]], +) -> None: if temp_dir is not None: worktree, branch = temp_dir changes = await check_changes(package, worktree, update_info) if len(changes) > 0: - await commit_changes(package['name'], merge_lock, worktree, branch, changes) + await commit_changes(package["name"], merge_lock, worktree, branch, changes) else: eprint(f" - {package['name']}: DONE, no changes.") else: eprint(f" - {package['name']}: DONE.") -async def updater(nixpkgs_root: str, temp_dir: Optional[Tuple[str, str]], merge_lock: asyncio.Lock, packages_to_update: asyncio.Queue[Optional[Dict]], keep_going: bool, commit: bool): + +async def updater( + nixpkgs_root: str, + temp_dir: Optional[Tuple[str, str]], + merge_lock: asyncio.Lock, + packages_to_update: asyncio.Queue[Optional[Dict]], + keep_going: bool, + commit: bool, +): while True: package = await packages_to_update.get() if package is None: # A sentinel received, we are done. return - if not ('commit' in package['supportedFeatures'] or 'attrPath' in package): + if not ("commit" in package["supportedFeatures"] or "attrPath" in package): temp_dir = None await run_update_script(nixpkgs_root, merge_lock, temp_dir, package, keep_going) -async def start_updates(max_workers: int, keep_going: bool, commit: bool, packages: List[Dict]): + +async def start_updates( + max_workers: int, + keep_going: bool, + commit: bool, + packages: List[Dict], +): merge_lock = asyncio.Lock() packages_to_update: asyncio.Queue[Optional[Dict]] = asyncio.Queue() @@ -177,8 +259,13 @@ async def start_updates(max_workers: int, keep_going: bool, commit: bool, packag # Do not create more workers than there are packages. num_workers = min(max_workers, len(packages)) - nixpkgs_root_output = await check_subprocess_output('git', 'rev-parse', '--show-toplevel', stdout=asyncio.subprocess.PIPE) - nixpkgs_root = nixpkgs_root_output.decode('utf-8').strip() + nixpkgs_root_output = await check_subprocess_output( + "git", + "rev-parse", + "--show-toplevel", + stdout=asyncio.subprocess.PIPE, + ) + nixpkgs_root = nixpkgs_root_output.decode("utf-8").strip() # Set up temporary directories when using auto-commit. for i in range(num_workers): @@ -196,7 +283,19 @@ async def start_updates(max_workers: int, keep_going: bool, commit: bool, packag # Prepare updater workers for each temp_dir directory. # At most `num_workers` instances of `run_update_script` will be running at one time. - updaters = asyncio.gather(*[updater(nixpkgs_root, temp_dir, merge_lock, packages_to_update, keep_going, commit) for temp_dir in temp_dirs]) + updaters = asyncio.gather( + *[ + updater( + nixpkgs_root, + temp_dir, + merge_lock, + packages_to_update, + keep_going, + commit, + ) + for temp_dir in temp_dirs + ] + ) try: # Start updater workers. @@ -210,43 +309,86 @@ async def start_updates(max_workers: int, keep_going: bool, commit: bool, packag eprint(e) sys.exit(1) -def main(max_workers: int, keep_going: bool, commit: bool, packages_path: str, skip_prompt: bool) -> None: + +def main( + max_workers: int, + keep_going: bool, + commit: bool, + packages_path: str, + skip_prompt: bool, +) -> None: with open(packages_path) as f: packages = json.load(f) eprint() - eprint('Going to be running update for following packages:') + eprint("Going to be running update for following packages:") for package in packages: eprint(f" - {package['name']}") eprint() - confirm = '' if skip_prompt else input('Press Enter key to continue...') + confirm = "" if skip_prompt else input("Press Enter key to continue...") - if confirm == '': + if confirm == "": eprint() - eprint('Running update for:') + eprint("Running update for:") asyncio.run(start_updates(max_workers, keep_going, commit, packages)) eprint() - eprint('Packages updated!') + eprint("Packages updated!") sys.exit() else: - eprint('Aborting!') + eprint("Aborting!") sys.exit(130) -parser = argparse.ArgumentParser(description='Update packages') -parser.add_argument('--max-workers', '-j', dest='max_workers', type=int, help='Number of updates to run concurrently', nargs='?', default=4) -parser.add_argument('--keep-going', '-k', dest='keep_going', action='store_true', help='Do not stop after first failure') -parser.add_argument('--commit', '-c', dest='commit', action='store_true', help='Commit the changes') -parser.add_argument('packages', help='JSON file containing the list of package names and their update scripts') -parser.add_argument('--skip-prompt', '-s', dest='skip_prompt', action='store_true', help='Do not stop for prompts') -if __name__ == '__main__': +parser = argparse.ArgumentParser(description="Update packages") +parser.add_argument( + "--max-workers", + "-j", + dest="max_workers", + type=int, + help="Number of updates to run concurrently", + nargs="?", + default=4, +) +parser.add_argument( + "--keep-going", + "-k", + dest="keep_going", + action="store_true", + help="Do not stop after first failure", +) +parser.add_argument( + "--commit", + "-c", + dest="commit", + action="store_true", + help="Commit the changes", +) +parser.add_argument( + "packages", + help="JSON file containing the list of package names and their update scripts", +) +parser.add_argument( + "--skip-prompt", + "-s", + dest="skip_prompt", + action="store_true", + help="Do not stop for prompts", +) + +if __name__ == "__main__": args = parser.parse_args() try: - main(args.max_workers, args.keep_going, args.commit, args.packages, args.skip_prompt) + main( + args.max_workers, + args.keep_going, + args.commit, + args.packages, + args.skip_prompt, + ) except KeyboardInterrupt as e: # Let’s cancel outside of the main loop too. sys.exit(130) From 81aed8eb80e9e4d540fb8c1c1ddddadf55ec6775 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sun, 2 Mar 2025 14:06:49 +0100 Subject: [PATCH 02/10] maintainers/scripts/update: Modernize types `list`, `dict` and `tuple` can accept generic arguments since Python 3.9: https://docs.python.org/3.9/whatsnew/3.9.html#type-hinting-generics-in-standard-collections `T | None` can be used instead of `Optional` since 3.10: https://docs.python.org/3.10/whatsnew/3.10.html#pep-604-new-type-union-operator --- maintainers/scripts/update.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/maintainers/scripts/update.py b/maintainers/scripts/update.py index cb30063580b6..f885d9694a22 100644 --- a/maintainers/scripts/update.py +++ b/maintainers/scripts/update.py @@ -1,5 +1,4 @@ -from __future__ import annotations -from typing import Dict, Generator, List, Optional, Tuple +from typing import Generator import argparse import asyncio import contextlib @@ -13,7 +12,7 @@ import tempfile class CalledProcessError(Exception): process: asyncio.subprocess.Process - stderr: Optional[bytes] + stderr: bytes | None class UpdateFailedException(Exception): @@ -46,11 +45,11 @@ async def check_subprocess_output(*args, **kwargs): async def run_update_script( nixpkgs_root: str, merge_lock: asyncio.Lock, - temp_dir: Optional[Tuple[str, str]], - package: Dict, + temp_dir: tuple[str, str] | None, + package: dict, keep_going: bool, ): - worktree: Optional[str] = None + worktree: str | None = None update_script_command = package["updateScript"] @@ -110,7 +109,7 @@ async def run_update_script( @contextlib.contextmanager -def make_worktree() -> Generator[Tuple[str, str], None, None]: +def make_worktree() -> Generator[tuple[str, str], None, None]: with tempfile.TemporaryDirectory() as wt: branch_name = f"update-{os.path.basename(wt)}" target_directory = f"{wt}/nixpkgs" @@ -128,7 +127,7 @@ async def commit_changes( merge_lock: asyncio.Lock, worktree: str, branch: str, - changes: List[Dict], + changes: list[dict], ) -> None: for change in changes: # Git can only handle a single index operation at a time @@ -151,7 +150,7 @@ async def commit_changes( async def check_changes( - package: Dict, + package: dict, worktree: str, update_info: str, ): @@ -208,9 +207,9 @@ async def check_changes( async def merge_changes( merge_lock: asyncio.Lock, - package: Dict, + package: dict, update_info: str, - temp_dir: Optional[Tuple[str, str]], + temp_dir: tuple[str, str] | None, ) -> None: if temp_dir is not None: worktree, branch = temp_dir @@ -226,9 +225,9 @@ async def merge_changes( async def updater( nixpkgs_root: str, - temp_dir: Optional[Tuple[str, str]], + temp_dir: tuple[str, str] | None, merge_lock: asyncio.Lock, - packages_to_update: asyncio.Queue[Optional[Dict]], + packages_to_update: asyncio.Queue[dict | None], keep_going: bool, commit: bool, ): @@ -248,13 +247,13 @@ async def start_updates( max_workers: int, keep_going: bool, commit: bool, - packages: List[Dict], + packages: list[dict], ): merge_lock = asyncio.Lock() - packages_to_update: asyncio.Queue[Optional[Dict]] = asyncio.Queue() + packages_to_update: asyncio.Queue[dict | None] = asyncio.Queue() with contextlib.ExitStack() as stack: - temp_dirs: List[Optional[Tuple[str, str]]] = [] + temp_dirs: list[tuple[str, str] | None] = [] # Do not create more workers than there are packages. num_workers = min(max_workers, len(packages)) From 76f44542a82ceb22da72c7a320e600ddf04451bf Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sun, 2 Mar 2025 14:15:42 +0100 Subject: [PATCH 03/10] maintainers/scripts/update: Add missing type hints --- maintainers/scripts/update.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/maintainers/scripts/update.py b/maintainers/scripts/update.py index f885d9694a22..f71ad25afb3a 100644 --- a/maintainers/scripts/update.py +++ b/maintainers/scripts/update.py @@ -1,4 +1,4 @@ -from typing import Generator +from typing import Any, Generator import argparse import asyncio import contextlib @@ -19,11 +19,11 @@ class UpdateFailedException(Exception): pass -def eprint(*args, **kwargs): +def eprint(*args: Any, **kwargs: Any) -> None: print(*args, file=sys.stderr, **kwargs) -async def check_subprocess_output(*args, **kwargs): +async def check_subprocess_output(*args: str, **kwargs: Any) -> bytes: """ Emulate check and capture_output arguments of subprocess.run function. """ @@ -48,7 +48,7 @@ async def run_update_script( temp_dir: tuple[str, str] | None, package: dict, keep_going: bool, -): +) -> None: worktree: str | None = None update_script_command = package["updateScript"] @@ -153,7 +153,7 @@ async def check_changes( package: dict, worktree: str, update_info: str, -): +) -> list[dict]: if "commit" in package["supportedFeatures"]: changes = json.loads(update_info) else: @@ -230,7 +230,7 @@ async def updater( packages_to_update: asyncio.Queue[dict | None], keep_going: bool, commit: bool, -): +) -> None: while True: package = await packages_to_update.get() if package is None: @@ -248,7 +248,7 @@ async def start_updates( keep_going: bool, commit: bool, packages: list[dict], -): +) -> None: merge_lock = asyncio.Lock() packages_to_update: asyncio.Queue[dict | None] = asyncio.Queue() From fdea10b43381f16701fd18863e992494559e2de6 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sun, 2 Mar 2025 14:16:23 +0100 Subject: [PATCH 04/10] maintainers/scripts/update: Fix update_info type This was revealed when we added return type to `check_subprocess_output`. --- maintainers/scripts/update.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maintainers/scripts/update.py b/maintainers/scripts/update.py index f71ad25afb3a..6111875c9fee 100644 --- a/maintainers/scripts/update.py +++ b/maintainers/scripts/update.py @@ -152,7 +152,7 @@ async def commit_changes( async def check_changes( package: dict, worktree: str, - update_info: str, + update_info: bytes, ) -> list[dict]: if "commit" in package["supportedFeatures"]: changes = json.loads(update_info) @@ -208,7 +208,7 @@ async def check_changes( async def merge_changes( merge_lock: asyncio.Lock, package: dict, - update_info: str, + update_info: bytes, temp_dir: tuple[str, str] | None, ) -> None: if temp_dir is not None: From e3772a9bcbad487151a8116d8996437c7c54a91a Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sun, 2 Mar 2025 16:32:46 +0100 Subject: [PATCH 05/10] maintainers/scripts/update: Mark queue jobs as done I believe this is only relevant if we were to `join` the queue itself but it is a good practice anyway. https://docs.python.org/3/library/asyncio-queue.html#asyncio.Queue.task_done --- maintainers/scripts/update.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/maintainers/scripts/update.py b/maintainers/scripts/update.py index 6111875c9fee..40fe70fb4c56 100644 --- a/maintainers/scripts/update.py +++ b/maintainers/scripts/update.py @@ -242,6 +242,8 @@ async def updater( await run_update_script(nixpkgs_root, merge_lock, temp_dir, package, keep_going) + packages_to_update.task_done() + async def start_updates( max_workers: int, From f54d20c3a1f8386fd98196444934fe773556cb79 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Mon, 3 Mar 2025 00:33:39 +0100 Subject: [PATCH 06/10] maintainers/scripts/update: Do not try to print error when there is not one This can happen e.g. due to an error during `merge_changes` since we do not do `stderr=PIPE` for git commands. --- maintainers/scripts/update.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/maintainers/scripts/update.py b/maintainers/scripts/update.py index 40fe70fb4c56..36a6c7218896 100644 --- a/maintainers/scripts/update.py +++ b/maintainers/scripts/update.py @@ -93,14 +93,19 @@ async def run_update_script( raise asyncio.exceptions.CancelledError() except CalledProcessError as e: eprint(f" - {package['name']}: ERROR") - eprint() - eprint(f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------") - eprint() - eprint(e.stderr.decode("utf-8")) - with open(f"{package['pname']}.log", "wb") as logfile: - logfile.write(e.stderr) - eprint() - eprint(f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------") + if e.stderr is not None: + eprint() + eprint( + f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------" + ) + eprint() + eprint(e.stderr.decode("utf-8")) + with open(f"{package['pname']}.log", "wb") as logfile: + logfile.write(e.stderr) + eprint() + eprint( + f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------" + ) if not keep_going: raise UpdateFailedException( From 6403eb8f4bcb54bd4411e4e2736b279072034e92 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Mon, 3 Mar 2025 00:03:21 +0100 Subject: [PATCH 07/10] _experimental-update-script-combinators.sequence: Loosen attrPath constraint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only allow a single value for `attrPath` across all sequenced update scripts. But previously `null` (representing `passthru.updateScript.attrPath` not being defined) was counted as one value. This would prevent us from explicitly specifying `attrPath` in `gnome.updateScript` in the next commit. Let’s ignore update scripts without specified `attrPath` attribute for the purpose of this check. --- pkgs/common-updater/combinators.nix | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pkgs/common-updater/combinators.nix b/pkgs/common-updater/combinators.nix index c4754848be87..eafc530ceaed 100644 --- a/pkgs/common-updater/combinators.nix +++ b/pkgs/common-updater/combinators.nix @@ -169,18 +169,21 @@ rec { assert lib.assertMsg (lib.all validateFeatures scripts) "Combining update scripts with features enabled (other than “silent” scripts and an optional single script with “commit”) is currently unsupported."; + assert lib.assertMsg ( builtins.length ( lib.unique ( - builtins.map ( - { - attrPath ? null, - ... - }: - attrPath - ) scripts + builtins.filter (attrPath: attrPath != null) ( + builtins.map ( + { + attrPath ? null, + ... + }: + attrPath + ) scripts + ) ) - ) == 1 + ) <= 1 ) "Combining update scripts with different attr paths is currently unsupported."; { From f9d05fe6a156b0fbe729e18e9f49dfc85301ac01 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sun, 2 Mar 2025 23:29:52 +0100 Subject: [PATCH 08/10] gnome.updateScript: expose canonical attrPath Without this, `update.nix` will discover `libxml2` as `emscriptenPackages.libxml2`. But since that instantiates a different, and much less common derivation, it will be erroneously considered a leaf package in topological order of packages. --- pkgs/desktops/gnome/update.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/pkgs/desktops/gnome/update.nix b/pkgs/desktops/gnome/update.nix index d6b92b0e77cc..2500739a22db 100644 --- a/pkgs/desktops/gnome/update.nix +++ b/pkgs/desktops/gnome/update.nix @@ -108,4 +108,5 @@ in supportedFeatures = [ "commit" ]; + inherit attrPath; } From 1d2c1810eb0e2c72fabf3c239ee9b0576faaac17 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sun, 2 Mar 2025 23:12:05 +0100 Subject: [PATCH 09/10] maintainers/scripts/update: Prepare for ordered updates Just minor refactorings: - Extract `updater_tasks` into a variable. - Make `main` itself async. --- maintainers/scripts/update.py | 50 +++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/maintainers/scripts/update.py b/maintainers/scripts/update.py index 36a6c7218896..62c2fb961f12 100644 --- a/maintainers/scripts/update.py +++ b/maintainers/scripts/update.py @@ -289,34 +289,36 @@ async def start_updates( # Prepare updater workers for each temp_dir directory. # At most `num_workers` instances of `run_update_script` will be running at one time. - updaters = asyncio.gather( - *[ - updater( - nixpkgs_root, - temp_dir, - merge_lock, - packages_to_update, - keep_going, - commit, - ) - for temp_dir in temp_dirs - ] + updater_tasks = [ + updater( + nixpkgs_root, + temp_dir, + merge_lock, + packages_to_update, + keep_going, + commit, + ) + for temp_dir in temp_dirs + ] + + tasks = asyncio.gather( + *updater_tasks, ) try: # Start updater workers. - await updaters + await tasks except asyncio.exceptions.CancelledError: # When one worker is cancelled, cancel the others too. - updaters.cancel() + tasks.cancel() except UpdateFailedException as e: # When one worker fails, cancel the others, as this exception is only thrown when keep_going is false. - updaters.cancel() + tasks.cancel() eprint(e) sys.exit(1) -def main( +async def main( max_workers: int, keep_going: bool, commit: bool, @@ -338,7 +340,7 @@ def main( eprint() eprint("Running update for:") - asyncio.run(start_updates(max_workers, keep_going, commit, packages)) + await start_updates(max_workers, keep_going, commit, packages) eprint() eprint("Packages updated!") @@ -388,12 +390,14 @@ if __name__ == "__main__": args = parser.parse_args() try: - main( - args.max_workers, - args.keep_going, - args.commit, - args.packages, - args.skip_prompt, + asyncio.run( + main( + args.max_workers, + args.keep_going, + args.commit, + args.packages, + args.skip_prompt, + ) ) except KeyboardInterrupt as e: # Let’s cancel outside of the main loop too. From ce96c797795486d2a9fb4aa9f8f1ad231e43bd61 Mon Sep 17 00:00:00 2001 From: Jan Tojnar Date: Sun, 2 Mar 2025 17:33:18 +0100 Subject: [PATCH 10/10] maintainers/scripts/update: Allow updating in (reverse) topological order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, when updating multiple packages, we just updated them in arbitrary order. However, when some of those packages depended on each other, it could happen that some of the intermediary commits would not build because of version constraints on dependencies. If we want each commit in the history to build when feasible, we need to consider four different scenarios: 1. Updated dependant is compatible with both the old and the new version of the dependency. Order of commits does not matter. But updating dependents first (i.e. reverse topological order) is useful since it allows building each package on the commit that updates it with minimal rebuilds. 2. Updated dependant raises the minimal dependency version. Dependency needs to be updated first (i.e. topological order). 3. Old dependant sets the maximal dependency version. Dependant needs to be updated first (i.e. reverse topological order). 4. Updated dependant depends on exact version of dependency and they are expected to be updated in lockstep. The earlier commit will be broken no matter the order. This change allows selecting the order of updates to facilitate the first three scenarios. Since most package sets only have loose version constraints, the reverse topological order will generally be the most convenient. In major package set updates like bumping GNOME release, there will be exceptions (e.g. libadwaita typically requires GTK 4 from the same release) but those were probably in broken order before as well. The downside of this feature is that it is quite slow – it requires instantiating each package and then querying Nix store for requisites. It may also fail to detect dependency if there are multiple variants of the package and dependant uses a different one than the canonical one. Testing with: env GNOME_UPDATE_STABILITY=unstable NIX_PATH=nixpkgs=$HOME/Projects/nixpkgs nix-shell maintainers/scripts/update.nix --arg predicate '(path: pkg: path == ["gnome-shell"] || path == ["mutter"] || path == ["glib"] || path == ["gtk3"] || path == ["pango"] || path == ["gnome-text-editor"])' --argstr order reverse-topological --argstr commit true --argstr max-workers 4 --- maintainers/scripts/update.nix | 16 ++- maintainers/scripts/update.py | 219 +++++++++++++++++++++++++++++++-- 2 files changed, 222 insertions(+), 13 deletions(-) diff --git a/maintainers/scripts/update.nix b/maintainers/scripts/update.nix index 05fcd6f12d2e..a45d2446f124 100644 --- a/maintainers/scripts/update.nix +++ b/maintainers/scripts/update.nix @@ -16,6 +16,7 @@ keep-going ? null, commit ? null, skip-prompt ? null, + order ? null, }: let @@ -217,6 +218,18 @@ let to skip prompt: --argstr skip-prompt true + + By default, the updater will update the packages in arbitrary order. Alternately, you can force a specific order based on the packages’ dependency relations: + + - Reverse topological order (e.g. {"gnome-text-editor", "gimp"}, {"gtk3", "gtk4"}, {"glib"}) is useful when you want checkout each commit one by one to build each package individually but some of the packages to be updated would cause a mass rebuild for the others. Of course, this requires that none of the updated dependents require a new version of the dependency. + + --argstr order reverse-topological + + - Topological order (e.g. {"glib"}, {"gtk3", "gtk4"}, {"gnome-text-editor", "gimp"}) is useful when the updated dependents require a new version of updated dependency. + + --argstr order topological + + Note that sorting requires instantiating each package and then querying Nix store for requisites so it will be pretty slow with large number of packages. ''; # Transform a matched package into an object for update.py. @@ -241,7 +254,8 @@ let lib.optional (max-workers != null) "--max-workers=${max-workers}" ++ lib.optional (keep-going == "true") "--keep-going" ++ lib.optional (commit == "true") "--commit" - ++ lib.optional (skip-prompt == "true") "--skip-prompt"; + ++ lib.optional (skip-prompt == "true") "--skip-prompt" + ++ lib.optional (order != null) "--order=${order}"; args = [ packagesJson ] ++ optionalArgs; diff --git a/maintainers/scripts/update.py b/maintainers/scripts/update.py index 62c2fb961f12..cfa051087ae5 100644 --- a/maintainers/scripts/update.py +++ b/maintainers/scripts/update.py @@ -1,4 +1,6 @@ -from typing import Any, Generator +from graphlib import TopologicalSorter +from pathlib import Path +from typing import Any, Generator, Literal import argparse import asyncio import contextlib @@ -10,6 +12,9 @@ import sys import tempfile +Order = Literal["arbitrary", "reverse-topological", "topological"] + + class CalledProcessError(Exception): process: asyncio.subprocess.Process stderr: bytes | None @@ -42,6 +47,145 @@ async def check_subprocess_output(*args: str, **kwargs: Any) -> bytes: return stdout +async def nix_instantiate(attr_path: str) -> Path: + out = await check_subprocess_output( + "nix-instantiate", + "-A", + attr_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + drv = out.decode("utf-8").strip().split("!", 1)[0] + + return Path(drv) + + +async def nix_query_requisites(drv: Path) -> list[Path]: + requisites = await check_subprocess_output( + "nix-store", + "--query", + "--requisites", + str(drv), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + drv_str = str(drv) + + return [ + Path(requisite) + for requisite in requisites.decode("utf-8").splitlines() + # Avoid self-loops. + if requisite != drv_str + ] + + +async def attr_instantiation_worker( + semaphore: asyncio.Semaphore, + attr_path: str, +) -> tuple[Path, str]: + async with semaphore: + eprint(f"Instantiating {attr_path}…") + return (await nix_instantiate(attr_path), attr_path) + + +async def requisites_worker( + semaphore: asyncio.Semaphore, + drv: Path, +) -> tuple[Path, list[Path]]: + async with semaphore: + eprint(f"Obtaining requisites for {drv}…") + return (drv, await nix_query_requisites(drv)) + + +def requisites_to_attrs( + drv_attr_paths: dict[Path, str], + requisites: list[Path], +) -> set[str]: + """ + Converts a set of requisite `.drv`s to a set of attribute paths. + Derivations that do not correspond to any of the packages we want to update will be discarded. + """ + return { + drv_attr_paths[requisite] + for requisite in requisites + if requisite in drv_attr_paths + } + + +def reverse_edges(graph: dict[str, set[str]]) -> dict[str, set[str]]: + """ + Flips the edges of a directed graph. + """ + + reversed_graph: dict[str, set[str]] = {} + for dependent, dependencies in graph.items(): + for dependency in dependencies: + reversed_graph.setdefault(dependency, set()).add(dependent) + + return reversed_graph + + +def get_independent_sorter( + packages: list[dict], +) -> TopologicalSorter[str]: + """ + Returns a sorter which treats all packages as independent, + which will allow them to be updated in parallel. + """ + + attr_deps: dict[str, set[str]] = { + package["attrPath"]: set() for package in packages + } + sorter = TopologicalSorter(attr_deps) + sorter.prepare() + + return sorter + + +async def get_topological_sorter( + max_workers: int, + packages: list[dict], + reverse_order: bool, +) -> tuple[TopologicalSorter[str], list[dict]]: + """ + Returns a sorter which returns packages in topological or reverse topological order, + which will ensure a package is updated before or after its dependencies, respectively. + """ + + semaphore = asyncio.Semaphore(max_workers) + + drv_attr_paths = dict( + await asyncio.gather( + *( + attr_instantiation_worker(semaphore, package["attrPath"]) + for package in packages + ) + ) + ) + + drv_requisites = await asyncio.gather( + *(requisites_worker(semaphore, drv) for drv in drv_attr_paths.keys()) + ) + + attr_deps = { + drv_attr_paths[drv]: requisites_to_attrs(drv_attr_paths, requisites) + for drv, requisites in drv_requisites + } + + if reverse_order: + attr_deps = reverse_edges(attr_deps) + + # Adjust packages order based on the topological one + ordered = list(TopologicalSorter(attr_deps).static_order()) + packages = sorted(packages, key=lambda package: ordered.index(package["attrPath"])) + + sorter = TopologicalSorter(attr_deps) + sorter.prepare() + + return sorter, packages + + async def run_update_script( nixpkgs_root: str, merge_lock: asyncio.Lock, @@ -250,11 +394,41 @@ async def updater( packages_to_update.task_done() +async def populate_queue( + attr_packages: dict[str, dict], + sorter: TopologicalSorter[str], + packages_to_update: asyncio.Queue[dict | None], + num_workers: int, +) -> None: + """ + Keeps populating the queue with packages that can be updated + according to ordering requirements. If topological order + is used, the packages will appear in waves, as packages with + no dependencies are processed and removed from the sorter. + With `order="none"`, all packages will be enqueued simultaneously. + """ + + # Fill up an update queue, + while sorter.is_active(): + ready_packages = list(sorter.get_ready()) + eprint(f"Enqueuing group of {len(ready_packages)} packages") + for package in ready_packages: + await packages_to_update.put(attr_packages[package]) + await packages_to_update.join() + sorter.done(*ready_packages) + + # Add sentinels, one for each worker. + # A worker will terminate when it gets a sentinel from the queue. + for i in range(num_workers): + await packages_to_update.put(None) + + async def start_updates( max_workers: int, keep_going: bool, commit: bool, - packages: list[dict], + attr_packages: dict[str, dict], + sorter: TopologicalSorter[str], ) -> None: merge_lock = asyncio.Lock() packages_to_update: asyncio.Queue[dict | None] = asyncio.Queue() @@ -263,7 +437,7 @@ async def start_updates( temp_dirs: list[tuple[str, str] | None] = [] # Do not create more workers than there are packages. - num_workers = min(max_workers, len(packages)) + num_workers = min(max_workers, len(attr_packages)) nixpkgs_root_output = await check_subprocess_output( "git", @@ -278,14 +452,12 @@ async def start_updates( temp_dir = stack.enter_context(make_worktree()) if commit else None temp_dirs.append(temp_dir) - # Fill up an update queue, - for package in packages: - await packages_to_update.put(package) - - # Add sentinels, one for each worker. - # A workers will terminate when it gets sentinel from the queue. - for i in range(num_workers): - await packages_to_update.put(None) + queue_task = populate_queue( + attr_packages, + sorter, + packages_to_update, + num_workers, + ) # Prepare updater workers for each temp_dir directory. # At most `num_workers` instances of `run_update_script` will be running at one time. @@ -303,6 +475,7 @@ async def start_updates( tasks = asyncio.gather( *updater_tasks, + queue_task, ) try: @@ -324,10 +497,24 @@ async def main( commit: bool, packages_path: str, skip_prompt: bool, + order: Order, ) -> None: with open(packages_path) as f: packages = json.load(f) + if order != "arbitrary": + eprint("Sorting packages…") + reverse_order = order == "reverse-topological" + sorter, packages = await get_topological_sorter( + max_workers, + packages, + reverse_order, + ) + else: + sorter = get_independent_sorter(packages) + + attr_packages = {package["attrPath"]: package for package in packages} + eprint() eprint("Going to be running update for following packages:") for package in packages: @@ -340,7 +527,7 @@ async def main( eprint() eprint("Running update for:") - await start_updates(max_workers, keep_going, commit, packages) + await start_updates(max_workers, keep_going, commit, attr_packages, sorter) eprint() eprint("Packages updated!") @@ -374,6 +561,13 @@ parser.add_argument( action="store_true", help="Commit the changes", ) +parser.add_argument( + "--order", + dest="order", + default="arbitrary", + choices=["arbitrary", "reverse-topological", "topological"], + help="Sort the packages based on dependency relation", +) parser.add_argument( "packages", help="JSON file containing the list of package names and their update scripts", @@ -397,6 +591,7 @@ if __name__ == "__main__": args.commit, args.packages, args.skip_prompt, + args.order, ) ) except KeyboardInterrupt as e: