Skip to content

Explicit outputs for wasm_cc_binary #1047

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 21 additions & 33 deletions bazel/emscripten_toolchain/wasm_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,9 @@
This script will take a tar archive containing the output of the emscripten
toolchain. This file contains any output files produced by a wasm_cc_binary or a
cc_binary built with --config=wasm. The files are extracted into the given
output path.
output paths.

The name of archive is expected to be of the format `foo` or `foo.XXX` and
the contents are expected to be foo.js and foo.wasm.

Several optional files may also be in the archive, including but not limited to
foo.js.mem, pthread-main.js, and foo.wasm.map.

If the file is not a tar archive, the passed file will simply be copied to its
destination.
The contents of the archive are expected to match the given outputs extnames.

This script and its accompanying Bazel rule should allow you to extract a
WebAssembly binary into a larger web application.
Expand All @@ -29,39 +22,34 @@ def ensure(f):
pass


def check(f):
if not os.path.exists(f):
raise Exception('Expected file in archive: %s' % f)


def main():
parser = argparse.ArgumentParser()
parser.add_argument('--archive', help='The archive to extract from.')
parser.add_argument('--output_path', help='The path to extract into.')
parser.add_argument('--outputs', help='Comma separated list of files that should be extracted from the archive. Only the extname has to match a file in the archive.')
parser.add_argument('--allow_empty_outputs', help='If an output listed in --outputs does not exist, create it anyways.', action='store_true')
args = parser.parse_args()

args.archive = os.path.normpath(args.archive)
args.outputs = args.outputs.split(",")

basename = os.path.basename(args.archive)
stem = basename.split('.')[0]

# Extract all files from the tarball.
tar = tarfile.open(args.archive)
tar.extractall(args.output_path)

# At least one of these two files should exist at this point.
ensure(os.path.join(args.output_path, stem + '.js'))
ensure(os.path.join(args.output_path, stem + '.wasm'))

# And can optionally contain these extra files.
ensure(os.path.join(args.output_path, stem + '.wasm.map'))
ensure(os.path.join(args.output_path, stem + '.worker.js'))
ensure(os.path.join(args.output_path, stem + '.js.mem'))
ensure(os.path.join(args.output_path, stem + '.data'))
ensure(os.path.join(args.output_path, stem + '.fetch.js'))
ensure(os.path.join(args.output_path, stem + '.js.symbols'))
ensure(os.path.join(args.output_path, stem + '.wasm.debug.wasm'))
ensure(os.path.join(args.output_path, stem + '.html'))
for member in tar.getmembers():
extname = '.' + member.name.split('.', 1)[1]
for idx, output in enumerate(args.outputs):
if output.endswith(extname):
member_file = tar.extractfile(member)
with open(output, "wb") as output_file:
output_file.write(member_file.read())
args.outputs.pop(idx)
break

for output in args.outputs:
extname = '.' + output.split('.', 1)[1]
if args.allow_empty_outputs:
ensure(output)
else:
print("[ERROR] Archive does not contain file with extname: %s" % extname)


if __name__ == '__main__':
Expand Down
140 changes: 104 additions & 36 deletions bazel/emscripten_toolchain/wasm_cc_binary.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,81 @@ _wasm_transition = transition(
],
)

def _wasm_binary_impl(ctx):
_ALLOW_OUTPUT_EXTNAMES = [
".js",
".wasm",
".wasm.map",
".worker.js",
".js.mem",
".data",
".fetch.js",
".js.symbols",
".wasm.debug.wasm",
".html",
]

_WASM_BINARY_COMMON_ATTRS = {
"backend": attr.string(
default = "_default",
values = ["_default", "emscripten", "llvm"],
),
"cc_target": attr.label(
cfg = _wasm_transition,
mandatory = True,
),
"exit_runtime": attr.bool(
default = False,
),
"threads": attr.string(
default = "_default",
values = ["_default", "emscripten", "off"],
),
"simd": attr.bool(
default = False,
),
"_allowlist_function_transition": attr.label(
default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
),
"_wasm_binary_extractor": attr.label(
executable = True,
allow_files = True,
cfg = "exec",
default = Label("@emsdk//emscripten_toolchain:wasm_binary"),
),
}

def _wasm_cc_binary_impl(ctx):
args = ctx.actions.args()
args.add("--output_path", ctx.outputs.loader.dirname)
cc_target = ctx.attr.cc_target[0]

for output in ctx.outputs.outputs:
valid_extname = False
for allowed_extname in _ALLOW_OUTPUT_EXTNAMES:
if output.path.endswith(allowed_extname):
valid_extname = True
break
if not valid_extname:
fail("Invalid output '{}'. Allowed extnames: {}".format(output.basename, ", ".join(_ALLOW_OUTPUT_EXTNAMES)))

args.add_all("--archive", ctx.files.cc_target)
args.add_joined("--outputs", ctx.outputs.outputs, join_with = ",")

ctx.actions.run(
inputs = ctx.files.cc_target,
outputs = ctx.outputs.outputs,
arguments = [args],
executable = ctx.executable._wasm_binary_extractor,
)

return DefaultInfo(
files = depset(ctx.outputs.outputs),
# This is needed since rules like web_test usually have a data
# dependency on this target.
data_runfiles = ctx.runfiles(transitive_files = depset(ctx.outputs.outputs)),
)

def _wasm_cc_binary_legacy_impl(ctx):
cc_target = ctx.attr.cc_target[0]
outputs = [
ctx.outputs.loader,
ctx.outputs.wasm,
Expand All @@ -72,6 +142,11 @@ def _wasm_binary_impl(ctx):
ctx.outputs.html,
]

args = ctx.actions.args()
args.add("--allow_empty_outputs")
args.add_all("--archive", ctx.files.cc_target)
args.add_joined("--outputs", outputs, join_with = ",")

ctx.actions.run(
inputs = ctx.files.cc_target,
outputs = outputs,
Expand All @@ -87,7 +162,19 @@ def _wasm_binary_impl(ctx):
data_runfiles = ctx.runfiles(transitive_files = depset(outputs)),
)

def _wasm_binary_outputs(name, cc_target):
_wasm_cc_binary = rule(
name = "wasm_cc_binary",
implementation = _wasm_cc_binary_impl,
attrs = dict(
_WASM_BINARY_COMMON_ATTRS,
outputs = attr.output_list(
allow_empty = False,
mandatory = True,
),
),
)

def _wasm_binary_legacy_outputs(name, cc_target):
basename = cc_target.name
basename = basename.split(".")[0]
outputs = {
Expand All @@ -105,6 +192,13 @@ def _wasm_binary_outputs(name, cc_target):

return outputs

_wasm_cc_binary_legacy = rule(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you set name = "wasm_cc_binary" for both of these?

that way the target type matches for things that care about the rule type.

Copy link
Contributor Author

@zaucy zaucy May 17, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! I didn't know that existed. Thanks!

I read the doc about it, but I have two questions if you don't mind answering:

  1. Is it not a problem that both _wasm_cc_binary_legacy and _wasm_cc_binary have the same name?
  2. Is it recommended that the rule name attribute is set for any rule that is wrapped in this style? I'd like to update my internal rules if that's the case. I'm unsure what the consequences of not doing it is.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. This is basically the only use for the name parameter AFAIK--when you need to do something spicy like this.
    See also: cc_test.bzl's usage

  2. I dunno if it's "recommended" or not, but when you care about the rule type being the same (as, in this case, we do -- for things like bazel query "kind(wasm_cc_binary...)" or native.existing_rules), you have two options:
    a) either use name or
    b) do some file import dance like cc_binary does. cc_binary does this thing where it defines the rules in separate files so they can both be named cc_binary, but then imports them with a different name to do this macro wrap.

I personally prefer using name, but I suppose both are correct so it might just be a matter of style.

name = "wasm_cc_binary",
implementation = _wasm_cc_binary_legacy_impl,
attrs = _WASM_BINARY_COMMON_ATTRS,
outputs = _wasm_binary_legacy_outputs,
)

# Wraps a C++ Blaze target, extracting the appropriate files.
#
# This rule will transition to the emscripten toolchain in order
Expand All @@ -113,36 +207,10 @@ def _wasm_binary_outputs(name, cc_target):
# Args:
# name: The name of the rule.
# cc_target: The cc_binary or cc_library to extract files from.
wasm_cc_binary = rule(
implementation = _wasm_binary_impl,
attrs = {
"backend": attr.string(
default = "_default",
values = ["_default", "emscripten", "llvm"],
),
"cc_target": attr.label(
cfg = _wasm_transition,
mandatory = True,
),
"exit_runtime": attr.bool(
default = False,
),
"threads": attr.string(
default = "_default",
values = ["_default", "emscripten", "off"],
),
"simd": attr.bool(
default = False,
),
"_allowlist_function_transition": attr.label(
default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
),
"_wasm_binary_extractor": attr.label(
executable = True,
allow_files = True,
cfg = "exec",
default = Label("@emsdk//emscripten_toolchain:wasm_binary"),
),
},
outputs = _wasm_binary_outputs,
)
def wasm_cc_binary(outputs = None, **kwargs):
# for backwards compatibility if no outputs are set the deprecated
# implementation is used.
if not outputs:
_wasm_cc_binary_legacy(**kwargs)
else:
_wasm_cc_binary(outputs = outputs, **kwargs)
4 changes: 4 additions & 0 deletions bazel/test_external/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ cc_binary(
wasm_cc_binary(
name = "hello-world-wasm",
cc_target = ":hello-world",
outputs = [
"hello-world.js",
"hello-world.wasm",
],
)

BASE_LINKOPTS = [
Expand Down