From 1c18a54177c588d145abf22c3d3b754f5539cdb4 Mon Sep 17 00:00:00 2001 From: Marijn Suijten Date: Sun, 11 Apr 2021 19:52:22 +0200 Subject: [PATCH] generator: Run all gir processes in parallel Since the addition of doc regeneration - which also spawns a gir process for every non-sys crate - the process is now incredibly slow and not well suited for iterative development: ./generator.py --no-fmt 26.25s user 0.79s system 99% cpu 27.044 total All gir processes are currently ran in serial (the generator waits for one to complete before spawning the next process) even though there are no inter-dependencies. Simply spawning all processes at once and collecting their results + printing them in order after everything has been spawned yields a significant speedup: ./generator.py --no-fmt 37.99s user 0.88s system 3285% cpu 1.183 total Note: this is on a 32-core ThreadRipper. The improvement is more modest on machines with less cores, and also depends on IO speed. A 4-core i5, before and after: ./generator.py --no-fmt 30.24s user 0.76s system 99% cpu 31.055 total ./generator.py --no-fmt 57.78s user 0.88s system 763% cpu 7.685 total That's still a sizable gain for simply not blocking on other tasks anymore. --- generator.py | 50 +++++++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/generator.py b/generator.py index ca270153b..0605e05f4 100755 --- a/generator.py +++ b/generator.py @@ -26,6 +26,10 @@ def run_command(command, folder=None): return True +def spawn_process(command): + return subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + def update_workspace(): return run_command(["cargo", "build", "--release"], "gir") @@ -73,13 +77,11 @@ def build_gir_if_needed(updated_submodule): def regen_crates(path, conf): + processes = [] if path.is_dir(): for entry in path.rglob("Gir*.toml"): - if not regen_crates(entry, conf): - return False + processes += regen_crates(entry, conf) elif path.match("Gir*.toml"): - print('==> Regenerating "{}"...'.format(path)) - args = [ conf.gir_path, "-c", @@ -87,7 +89,7 @@ def regen_crates(path, conf): "-o", path.parent, ] + [d for path in conf.gir_files_paths for d in ("-d", path)] - error = False + if path.parent.name.endswith("sys"): args.extend(["-m", "sys"]) else: @@ -100,28 +102,27 @@ def regen_crates(path, conf): .joinpath(path.parent) .joinpath("docs.md") ) - print("==> Docs into {}".format(doc_path)) doc_args = args + [ "-m", "doc", "--doc-target-path", doc_path, ] - error |= not run_command(doc_args) + processes.append( + ( + "Regenerating documentation for `{}` into `{}`...".format( + path, doc_path + ), + spawn_process(doc_args), + ) + ) + + processes.append(("Regenerating `{}`...".format(path), spawn_process(args))) - try: - error |= not run_command(args) - except Exception as err: - print("The following error occurred: {}".format(err)) - error = True - if error: - if not ask_yes_no_question("Do you want to continue?", conf): - return False - print("<== Done!") else: - print("==> {} is not a valid Gir*.toml file".format(path)) - return False - return True + raise Exception("`{}` is not a valid Gir*.toml file".format(path)) + + return processes def valid_path(path): @@ -196,8 +197,15 @@ def main(): print("=> Regenerating crates...") for path in conf.path: print("=> Looking in path `{}`".format(path)) - if not regen_crates(path, conf): - return 1 + processes = regen_crates(path, conf) + for log, p in processes: + print("==> {}".format(log)) + stdout, stderr = p.communicate() + # Gir doesn't print anything to stdout. If it does, this is likely out of + # order with stderr, unless the printer/logging flushes in between. + assert stdout == b"" + print(stderr.decode("utf-8"), end="") + if not conf.no_fmt and not run_command(["cargo", "fmt"]): return 1 print("<= Done!")