diff --git a/maintainers/scripts/update.nix b/maintainers/scripts/update.nix index 05fcd6f12d2e..a45d2446f124 100644 --- a/maintainers/scripts/update.nix +++ b/maintainers/scripts/update.nix @@ -16,6 +16,7 @@ keep-going ? null, commit ? null, skip-prompt ? null, + order ? null, }: let @@ -217,6 +218,18 @@ let to skip prompt: --argstr skip-prompt true + + By default, the updater will update the packages in arbitrary order. Alternately, you can force a specific order based on the packages’ dependency relations: + + - Reverse topological order (e.g. {"gnome-text-editor", "gimp"}, {"gtk3", "gtk4"}, {"glib"}) is useful when you want checkout each commit one by one to build each package individually but some of the packages to be updated would cause a mass rebuild for the others. Of course, this requires that none of the updated dependents require a new version of the dependency. + + --argstr order reverse-topological + + - Topological order (e.g. {"glib"}, {"gtk3", "gtk4"}, {"gnome-text-editor", "gimp"}) is useful when the updated dependents require a new version of updated dependency. + + --argstr order topological + + Note that sorting requires instantiating each package and then querying Nix store for requisites so it will be pretty slow with large number of packages. ''; # Transform a matched package into an object for update.py. @@ -241,7 +254,8 @@ let lib.optional (max-workers != null) "--max-workers=${max-workers}" ++ lib.optional (keep-going == "true") "--keep-going" ++ lib.optional (commit == "true") "--commit" - ++ lib.optional (skip-prompt == "true") "--skip-prompt"; + ++ lib.optional (skip-prompt == "true") "--skip-prompt" + ++ lib.optional (order != null) "--order=${order}"; args = [ packagesJson ] ++ optionalArgs; diff --git a/maintainers/scripts/update.py b/maintainers/scripts/update.py index 7d0059db28a7..cfa051087ae5 100644 --- a/maintainers/scripts/update.py +++ b/maintainers/scripts/update.py @@ -1,5 +1,6 @@ -from __future__ import annotations -from typing import Dict, Generator, List, Optional, Tuple +from graphlib import TopologicalSorter +from pathlib import Path +from typing import Any, Generator, Literal import argparse import asyncio import contextlib @@ -10,17 +11,24 @@ import subprocess import sys import tempfile + +Order = Literal["arbitrary", "reverse-topological", "topological"] + + class CalledProcessError(Exception): process: asyncio.subprocess.Process - stderr: Optional[bytes] + stderr: bytes | None + class UpdateFailedException(Exception): pass -def eprint(*args, **kwargs): + +def eprint(*args: Any, **kwargs: Any) -> None: print(*args, file=sys.stderr, **kwargs) -async def check_subprocess_output(*args, **kwargs): + +async def check_subprocess_output(*args: str, **kwargs: Any) -> bytes: """ Emulate check and capture_output arguments of subprocess.run function. """ @@ -38,26 +46,182 @@ async def check_subprocess_output(*args, **kwargs): return stdout -async def run_update_script(nixpkgs_root: str, merge_lock: asyncio.Lock, temp_dir: Optional[Tuple[str, str]], package: Dict, keep_going: bool): - worktree: Optional[str] = None - update_script_command = package['updateScript'] +async def nix_instantiate(attr_path: str) -> Path: + out = await check_subprocess_output( + "nix-instantiate", + "-A", + attr_path, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + drv = out.decode("utf-8").strip().split("!", 1)[0] + + return Path(drv) + + +async def nix_query_requisites(drv: Path) -> list[Path]: + requisites = await check_subprocess_output( + "nix-store", + "--query", + "--requisites", + str(drv), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + drv_str = str(drv) + + return [ + Path(requisite) + for requisite in requisites.decode("utf-8").splitlines() + # Avoid self-loops. + if requisite != drv_str + ] + + +async def attr_instantiation_worker( + semaphore: asyncio.Semaphore, + attr_path: str, +) -> tuple[Path, str]: + async with semaphore: + eprint(f"Instantiating {attr_path}…") + return (await nix_instantiate(attr_path), attr_path) + + +async def requisites_worker( + semaphore: asyncio.Semaphore, + drv: Path, +) -> tuple[Path, list[Path]]: + async with semaphore: + eprint(f"Obtaining requisites for {drv}…") + return (drv, await nix_query_requisites(drv)) + + +def requisites_to_attrs( + drv_attr_paths: dict[Path, str], + requisites: list[Path], +) -> set[str]: + """ + Converts a set of requisite `.drv`s to a set of attribute paths. + Derivations that do not correspond to any of the packages we want to update will be discarded. + """ + return { + drv_attr_paths[requisite] + for requisite in requisites + if requisite in drv_attr_paths + } + + +def reverse_edges(graph: dict[str, set[str]]) -> dict[str, set[str]]: + """ + Flips the edges of a directed graph. + """ + + reversed_graph: dict[str, set[str]] = {} + for dependent, dependencies in graph.items(): + for dependency in dependencies: + reversed_graph.setdefault(dependency, set()).add(dependent) + + return reversed_graph + + +def get_independent_sorter( + packages: list[dict], +) -> TopologicalSorter[str]: + """ + Returns a sorter which treats all packages as independent, + which will allow them to be updated in parallel. + """ + + attr_deps: dict[str, set[str]] = { + package["attrPath"]: set() for package in packages + } + sorter = TopologicalSorter(attr_deps) + sorter.prepare() + + return sorter + + +async def get_topological_sorter( + max_workers: int, + packages: list[dict], + reverse_order: bool, +) -> tuple[TopologicalSorter[str], list[dict]]: + """ + Returns a sorter which returns packages in topological or reverse topological order, + which will ensure a package is updated before or after its dependencies, respectively. + """ + + semaphore = asyncio.Semaphore(max_workers) + + drv_attr_paths = dict( + await asyncio.gather( + *( + attr_instantiation_worker(semaphore, package["attrPath"]) + for package in packages + ) + ) + ) + + drv_requisites = await asyncio.gather( + *(requisites_worker(semaphore, drv) for drv in drv_attr_paths.keys()) + ) + + attr_deps = { + drv_attr_paths[drv]: requisites_to_attrs(drv_attr_paths, requisites) + for drv, requisites in drv_requisites + } + + if reverse_order: + attr_deps = reverse_edges(attr_deps) + + # Adjust packages order based on the topological one + ordered = list(TopologicalSorter(attr_deps).static_order()) + packages = sorted(packages, key=lambda package: ordered.index(package["attrPath"])) + + sorter = TopologicalSorter(attr_deps) + sorter.prepare() + + return sorter, packages + + +async def run_update_script( + nixpkgs_root: str, + merge_lock: asyncio.Lock, + temp_dir: tuple[str, str] | None, + package: dict, + keep_going: bool, +) -> None: + worktree: str | None = None + + update_script_command = package["updateScript"] if temp_dir is not None: worktree, _branch = temp_dir # Ensure the worktree is clean before update. - await check_subprocess_output('git', 'reset', '--hard', '--quiet', 'HEAD', cwd=worktree) + await check_subprocess_output( + "git", + "reset", + "--hard", + "--quiet", + "HEAD", + cwd=worktree, + ) # Update scripts can use $(dirname $0) to get their location but we want to run # their clones in the git worktree, not in the main nixpkgs repo. - update_script_command = map(lambda arg: re.sub(r'^{0}'.format(re.escape(nixpkgs_root)), worktree, arg), update_script_command) + update_script_command = map( + lambda arg: re.sub(r"^{0}".format(re.escape(nixpkgs_root)), worktree, arg), + update_script_command, + ) eprint(f" - {package['name']}: UPDATING ...") try: update_info = await check_subprocess_output( - 'env', + "env", f"UPDATE_NIX_NAME={package['name']}", f"UPDATE_NIX_PNAME={package['pname']}", f"UPDATE_NIX_OLD_VERSION={package['oldVersion']}", @@ -69,50 +233,77 @@ async def run_update_script(nixpkgs_root: str, merge_lock: asyncio.Lock, temp_di ) await merge_changes(merge_lock, package, update_info, temp_dir) except KeyboardInterrupt as e: - eprint('Cancelling…') + eprint("Cancelling…") raise asyncio.exceptions.CancelledError() except CalledProcessError as e: eprint(f" - {package['name']}: ERROR") - eprint() - eprint(f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------") - eprint() - eprint(e.stderr.decode('utf-8')) - with open(f"{package['pname']}.log", 'wb') as logfile: - logfile.write(e.stderr) - eprint() - eprint(f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------") + if e.stderr is not None: + eprint() + eprint( + f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------" + ) + eprint() + eprint(e.stderr.decode("utf-8")) + with open(f"{package['pname']}.log", "wb") as logfile: + logfile.write(e.stderr) + eprint() + eprint( + f"--- SHOWING ERROR LOG FOR {package['name']} ----------------------" + ) if not keep_going: - raise UpdateFailedException(f"The update script for {package['name']} failed with exit code {e.process.returncode}") + raise UpdateFailedException( + f"The update script for {package['name']} failed with exit code {e.process.returncode}" + ) + @contextlib.contextmanager -def make_worktree() -> Generator[Tuple[str, str], None, None]: +def make_worktree() -> Generator[tuple[str, str], None, None]: with tempfile.TemporaryDirectory() as wt: - branch_name = f'update-{os.path.basename(wt)}' - target_directory = f'{wt}/nixpkgs' + branch_name = f"update-{os.path.basename(wt)}" + target_directory = f"{wt}/nixpkgs" - subprocess.run(['git', 'worktree', 'add', '-b', branch_name, target_directory]) + subprocess.run(["git", "worktree", "add", "-b", branch_name, target_directory]) try: yield (target_directory, branch_name) finally: - subprocess.run(['git', 'worktree', 'remove', '--force', target_directory]) - subprocess.run(['git', 'branch', '-D', branch_name]) + subprocess.run(["git", "worktree", "remove", "--force", target_directory]) + subprocess.run(["git", "branch", "-D", branch_name]) -async def commit_changes(name: str, merge_lock: asyncio.Lock, worktree: str, branch: str, changes: List[Dict]) -> None: + +async def commit_changes( + name: str, + merge_lock: asyncio.Lock, + worktree: str, + branch: str, + changes: list[dict], +) -> None: for change in changes: # Git can only handle a single index operation at a time async with merge_lock: - await check_subprocess_output('git', 'add', *change['files'], cwd=worktree) - commit_message = '{attrPath}: {oldVersion} -> {newVersion}'.format(**change) - if 'commitMessage' in change: - commit_message = change['commitMessage'] - elif 'commitBody' in change: - commit_message = commit_message + '\n\n' + change['commitBody'] - await check_subprocess_output('git', 'commit', '--quiet', '-m', commit_message, cwd=worktree) - await check_subprocess_output('git', 'cherry-pick', branch) + await check_subprocess_output("git", "add", *change["files"], cwd=worktree) + commit_message = "{attrPath}: {oldVersion} -> {newVersion}".format(**change) + if "commitMessage" in change: + commit_message = change["commitMessage"] + elif "commitBody" in change: + commit_message = commit_message + "\n\n" + change["commitBody"] + await check_subprocess_output( + "git", + "commit", + "--quiet", + "-m", + commit_message, + cwd=worktree, + ) + await check_subprocess_output("git", "cherry-pick", branch) -async def check_changes(package: Dict, worktree: str, update_info: str): - if 'commit' in package['supportedFeatures']: + +async def check_changes( + package: dict, + worktree: str, + update_info: bytes, +) -> list[dict]: + if "commit" in package["supportedFeatures"]: changes = json.loads(update_info) else: changes = [{}] @@ -120,133 +311,289 @@ async def check_changes(package: Dict, worktree: str, update_info: str): # Try to fill in missing attributes when there is just a single change. if len(changes) == 1: # Dynamic data from updater take precedence over static data from passthru.updateScript. - if 'attrPath' not in changes[0]: + if "attrPath" not in changes[0]: # update.nix is always passing attrPath - changes[0]['attrPath'] = package['attrPath'] + changes[0]["attrPath"] = package["attrPath"] - if 'oldVersion' not in changes[0]: + if "oldVersion" not in changes[0]: # update.nix is always passing oldVersion - changes[0]['oldVersion'] = package['oldVersion'] + changes[0]["oldVersion"] = package["oldVersion"] - if 'newVersion' not in changes[0]: - attr_path = changes[0]['attrPath'] - obtain_new_version_output = await check_subprocess_output('nix-instantiate', '--expr', f'with import ./. {{}}; lib.getVersion {attr_path}', '--eval', '--strict', '--json', stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, cwd=worktree) - changes[0]['newVersion'] = json.loads(obtain_new_version_output.decode('utf-8')) + if "newVersion" not in changes[0]: + attr_path = changes[0]["attrPath"] + obtain_new_version_output = await check_subprocess_output( + "nix-instantiate", + "--expr", + f"with import ./. {{}}; lib.getVersion {attr_path}", + "--eval", + "--strict", + "--json", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=worktree, + ) + changes[0]["newVersion"] = json.loads( + obtain_new_version_output.decode("utf-8") + ) - if 'files' not in changes[0]: - changed_files_output = await check_subprocess_output('git', 'diff', '--name-only', 'HEAD', stdout=asyncio.subprocess.PIPE, cwd=worktree) + if "files" not in changes[0]: + changed_files_output = await check_subprocess_output( + "git", + "diff", + "--name-only", + "HEAD", + stdout=asyncio.subprocess.PIPE, + cwd=worktree, + ) changed_files = changed_files_output.splitlines() - changes[0]['files'] = changed_files + changes[0]["files"] = changed_files if len(changed_files) == 0: return [] return changes -async def merge_changes(merge_lock: asyncio.Lock, package: Dict, update_info: str, temp_dir: Optional[Tuple[str, str]]) -> None: + +async def merge_changes( + merge_lock: asyncio.Lock, + package: dict, + update_info: bytes, + temp_dir: tuple[str, str] | None, +) -> None: if temp_dir is not None: worktree, branch = temp_dir changes = await check_changes(package, worktree, update_info) if len(changes) > 0: - await commit_changes(package['name'], merge_lock, worktree, branch, changes) + await commit_changes(package["name"], merge_lock, worktree, branch, changes) else: eprint(f" - {package['name']}: DONE, no changes.") else: eprint(f" - {package['name']}: DONE.") -async def updater(nixpkgs_root: str, temp_dir: Optional[Tuple[str, str]], merge_lock: asyncio.Lock, packages_to_update: asyncio.Queue[Optional[Dict]], keep_going: bool, commit: bool): + +async def updater( + nixpkgs_root: str, + temp_dir: tuple[str, str] | None, + merge_lock: asyncio.Lock, + packages_to_update: asyncio.Queue[dict | None], + keep_going: bool, + commit: bool, +) -> None: while True: package = await packages_to_update.get() if package is None: # A sentinel received, we are done. return - if not ('commit' in package['supportedFeatures'] or 'attrPath' in package): + if not ("commit" in package["supportedFeatures"] or "attrPath" in package): temp_dir = None await run_update_script(nixpkgs_root, merge_lock, temp_dir, package, keep_going) -async def start_updates(max_workers: int, keep_going: bool, commit: bool, packages: List[Dict]): + packages_to_update.task_done() + + +async def populate_queue( + attr_packages: dict[str, dict], + sorter: TopologicalSorter[str], + packages_to_update: asyncio.Queue[dict | None], + num_workers: int, +) -> None: + """ + Keeps populating the queue with packages that can be updated + according to ordering requirements. If topological order + is used, the packages will appear in waves, as packages with + no dependencies are processed and removed from the sorter. + With `order="none"`, all packages will be enqueued simultaneously. + """ + + # Fill up an update queue, + while sorter.is_active(): + ready_packages = list(sorter.get_ready()) + eprint(f"Enqueuing group of {len(ready_packages)} packages") + for package in ready_packages: + await packages_to_update.put(attr_packages[package]) + await packages_to_update.join() + sorter.done(*ready_packages) + + # Add sentinels, one for each worker. + # A worker will terminate when it gets a sentinel from the queue. + for i in range(num_workers): + await packages_to_update.put(None) + + +async def start_updates( + max_workers: int, + keep_going: bool, + commit: bool, + attr_packages: dict[str, dict], + sorter: TopologicalSorter[str], +) -> None: merge_lock = asyncio.Lock() - packages_to_update: asyncio.Queue[Optional[Dict]] = asyncio.Queue() + packages_to_update: asyncio.Queue[dict | None] = asyncio.Queue() with contextlib.ExitStack() as stack: - temp_dirs: List[Optional[Tuple[str, str]]] = [] + temp_dirs: list[tuple[str, str] | None] = [] # Do not create more workers than there are packages. - num_workers = min(max_workers, len(packages)) + num_workers = min(max_workers, len(attr_packages)) - nixpkgs_root_output = await check_subprocess_output('git', 'rev-parse', '--show-toplevel', stdout=asyncio.subprocess.PIPE) - nixpkgs_root = nixpkgs_root_output.decode('utf-8').strip() + nixpkgs_root_output = await check_subprocess_output( + "git", + "rev-parse", + "--show-toplevel", + stdout=asyncio.subprocess.PIPE, + ) + nixpkgs_root = nixpkgs_root_output.decode("utf-8").strip() # Set up temporary directories when using auto-commit. for i in range(num_workers): temp_dir = stack.enter_context(make_worktree()) if commit else None temp_dirs.append(temp_dir) - # Fill up an update queue, - for package in packages: - await packages_to_update.put(package) - - # Add sentinels, one for each worker. - # A workers will terminate when it gets sentinel from the queue. - for i in range(num_workers): - await packages_to_update.put(None) + queue_task = populate_queue( + attr_packages, + sorter, + packages_to_update, + num_workers, + ) # Prepare updater workers for each temp_dir directory. # At most `num_workers` instances of `run_update_script` will be running at one time. - updaters = asyncio.gather(*[updater(nixpkgs_root, temp_dir, merge_lock, packages_to_update, keep_going, commit) for temp_dir in temp_dirs]) + updater_tasks = [ + updater( + nixpkgs_root, + temp_dir, + merge_lock, + packages_to_update, + keep_going, + commit, + ) + for temp_dir in temp_dirs + ] + + tasks = asyncio.gather( + *updater_tasks, + queue_task, + ) try: # Start updater workers. - await updaters + await tasks except asyncio.exceptions.CancelledError: # When one worker is cancelled, cancel the others too. - updaters.cancel() + tasks.cancel() except UpdateFailedException as e: # When one worker fails, cancel the others, as this exception is only thrown when keep_going is false. - updaters.cancel() + tasks.cancel() eprint(e) sys.exit(1) -def main(max_workers: int, keep_going: bool, commit: bool, packages_path: str, skip_prompt: bool) -> None: + +async def main( + max_workers: int, + keep_going: bool, + commit: bool, + packages_path: str, + skip_prompt: bool, + order: Order, +) -> None: with open(packages_path) as f: packages = json.load(f) + if order != "arbitrary": + eprint("Sorting packages…") + reverse_order = order == "reverse-topological" + sorter, packages = await get_topological_sorter( + max_workers, + packages, + reverse_order, + ) + else: + sorter = get_independent_sorter(packages) + + attr_packages = {package["attrPath"]: package for package in packages} + eprint() - eprint('Going to be running update for following packages:') + eprint("Going to be running update for following packages:") for package in packages: eprint(f" - {package['name']}") eprint() - confirm = '' if skip_prompt else input('Press Enter key to continue...') + confirm = "" if skip_prompt else input("Press Enter key to continue...") - if confirm == '': + if confirm == "": eprint() - eprint('Running update for:') + eprint("Running update for:") - asyncio.run(start_updates(max_workers, keep_going, commit, packages)) + await start_updates(max_workers, keep_going, commit, attr_packages, sorter) eprint() - eprint('Packages updated!') + eprint("Packages updated!") sys.exit() else: - eprint('Aborting!') + eprint("Aborting!") sys.exit(130) -parser = argparse.ArgumentParser(description='Update packages') -parser.add_argument('--max-workers', '-j', dest='max_workers', type=int, help='Number of updates to run concurrently', nargs='?', default=4) -parser.add_argument('--keep-going', '-k', dest='keep_going', action='store_true', help='Do not stop after first failure') -parser.add_argument('--commit', '-c', dest='commit', action='store_true', help='Commit the changes') -parser.add_argument('packages', help='JSON file containing the list of package names and their update scripts') -parser.add_argument('--skip-prompt', '-s', dest='skip_prompt', action='store_true', help='Do not stop for prompts') -if __name__ == '__main__': +parser = argparse.ArgumentParser(description="Update packages") +parser.add_argument( + "--max-workers", + "-j", + dest="max_workers", + type=int, + help="Number of updates to run concurrently", + nargs="?", + default=4, +) +parser.add_argument( + "--keep-going", + "-k", + dest="keep_going", + action="store_true", + help="Do not stop after first failure", +) +parser.add_argument( + "--commit", + "-c", + dest="commit", + action="store_true", + help="Commit the changes", +) +parser.add_argument( + "--order", + dest="order", + default="arbitrary", + choices=["arbitrary", "reverse-topological", "topological"], + help="Sort the packages based on dependency relation", +) +parser.add_argument( + "packages", + help="JSON file containing the list of package names and their update scripts", +) +parser.add_argument( + "--skip-prompt", + "-s", + dest="skip_prompt", + action="store_true", + help="Do not stop for prompts", +) + +if __name__ == "__main__": args = parser.parse_args() try: - main(args.max_workers, args.keep_going, args.commit, args.packages, args.skip_prompt) + asyncio.run( + main( + args.max_workers, + args.keep_going, + args.commit, + args.packages, + args.skip_prompt, + args.order, + ) + ) except KeyboardInterrupt as e: # Let’s cancel outside of the main loop too. sys.exit(130) diff --git a/pkgs/common-updater/combinators.nix b/pkgs/common-updater/combinators.nix index c4754848be87..eafc530ceaed 100644 --- a/pkgs/common-updater/combinators.nix +++ b/pkgs/common-updater/combinators.nix @@ -169,18 +169,21 @@ rec { assert lib.assertMsg (lib.all validateFeatures scripts) "Combining update scripts with features enabled (other than “silent” scripts and an optional single script with “commit”) is currently unsupported."; + assert lib.assertMsg ( builtins.length ( lib.unique ( - builtins.map ( - { - attrPath ? null, - ... - }: - attrPath - ) scripts + builtins.filter (attrPath: attrPath != null) ( + builtins.map ( + { + attrPath ? null, + ... + }: + attrPath + ) scripts + ) ) - ) == 1 + ) <= 1 ) "Combining update scripts with different attr paths is currently unsupported."; { diff --git a/pkgs/desktops/gnome/update.nix b/pkgs/desktops/gnome/update.nix index d6b92b0e77cc..2500739a22db 100644 --- a/pkgs/desktops/gnome/update.nix +++ b/pkgs/desktops/gnome/update.nix @@ -108,4 +108,5 @@ in supportedFeatures = [ "commit" ]; + inherit attrPath; }