generator: Run all gir processes in parallel

Since the addition of doc regeneration - which also spawns a gir process
for every non-sys crate - the process is now incredibly slow and not
well suited for iterative development:

    ./generator.py --no-fmt  26.25s user 0.79s system 99% cpu 27.044 total

All gir processes are currently ran in serial (the generator waits for
one to complete before spawning the next process) even though there are
no inter-dependencies.  Simply spawning all processes at once and
collecting their results + printing them in order after everything has
been spawned yields a significant speedup:

    ./generator.py --no-fmt  37.99s user 0.88s system 3285% cpu 1.183 total

Note: this is on a 32-core ThreadRipper.  The improvement is more modest
on machines with less cores, and also depends on IO speed.  A 4-core i5,
before and after:

    ./generator.py --no-fmt  30.24s user 0.76s system 99% cpu 31.055 total
    ./generator.py --no-fmt  57.78s user 0.88s system 763% cpu 7.685 total

That's still a sizable gain for simply not blocking on other tasks
anymore.
This commit is contained in:
Marijn Suijten 2021-04-11 19:52:22 +02:00 committed by Sebastian Dröge
parent a65d2df25c
commit 1c18a54177

View file

@ -26,6 +26,10 @@ def run_command(command, folder=None):
return True
def spawn_process(command):
return subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def update_workspace():
return run_command(["cargo", "build", "--release"], "gir")
@ -73,13 +77,11 @@ def build_gir_if_needed(updated_submodule):
def regen_crates(path, conf):
processes = []
if path.is_dir():
for entry in path.rglob("Gir*.toml"):
if not regen_crates(entry, conf):
return False
processes += regen_crates(entry, conf)
elif path.match("Gir*.toml"):
print('==> Regenerating "{}"...'.format(path))
args = [
conf.gir_path,
"-c",
@ -87,7 +89,7 @@ def regen_crates(path, conf):
"-o",
path.parent,
] + [d for path in conf.gir_files_paths for d in ("-d", path)]
error = False
if path.parent.name.endswith("sys"):
args.extend(["-m", "sys"])
else:
@ -100,28 +102,27 @@ def regen_crates(path, conf):
.joinpath(path.parent)
.joinpath("docs.md")
)
print("==> Docs into {}".format(doc_path))
doc_args = args + [
"-m",
"doc",
"--doc-target-path",
doc_path,
]
error |= not run_command(doc_args)
processes.append(
(
"Regenerating documentation for `{}` into `{}`...".format(
path, doc_path
),
spawn_process(doc_args),
)
)
processes.append(("Regenerating `{}`...".format(path), spawn_process(args)))
try:
error |= not run_command(args)
except Exception as err:
print("The following error occurred: {}".format(err))
error = True
if error:
if not ask_yes_no_question("Do you want to continue?", conf):
return False
print("<== Done!")
else:
print("==> {} is not a valid Gir*.toml file".format(path))
return False
return True
raise Exception("`{}` is not a valid Gir*.toml file".format(path))
return processes
def valid_path(path):
@ -196,8 +197,15 @@ def main():
print("=> Regenerating crates...")
for path in conf.path:
print("=> Looking in path `{}`".format(path))
if not regen_crates(path, conf):
return 1
processes = regen_crates(path, conf)
for log, p in processes:
print("==> {}".format(log))
stdout, stderr = p.communicate()
# Gir doesn't print anything to stdout. If it does, this is likely out of
# order with stderr, unless the printer/logging flushes in between.
assert stdout == b""
print(stderr.decode("utf-8"), end="")
if not conf.no_fmt and not run_command(["cargo", "fmt"]):
return 1
print("<= Done!")