From 9c864691b1b8b8b3c17eb05dfdf493e562258338 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olle=20Hyn=C3=A9n=20Ulfsj=C3=B6=C3=B6?= Date: Tue, 21 May 2024 15:09:26 +0200 Subject: [PATCH 1/2] Add bazel bin --- refresh.template.py | 40 +++++++++++++++++++++--------------- refresh_compile_commands.bzl | 2 ++ 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 3b33701..ff662b5 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -95,7 +95,7 @@ def _print_header_finding_warning_once(): _print_header_finding_warning_once.has_logged = False -@functools.lru_cache(maxsize=None) +@functools.lru_cache(bazel_binary, maxsize=None) def _get_bazel_version(): """Gets the Bazel version as a tuple of (major, minor, patch). @@ -104,7 +104,7 @@ def _get_bazel_version(): If the version can't be determined, returns (0, 0, 0). """ bazel_version_process = subprocess.run( - ['bazel', 'version'], + [bazel_binary, 'version'], # MIN_PY=3.7: Replace PIPEs with capture_output. stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -127,7 +127,7 @@ def _get_bazel_version(): @functools.lru_cache(maxsize=None) -def _get_bazel_cached_action_keys(): +def _get_bazel_cached_action_keys(bazel_binary): """Gets the set of actionKeys cached in bazel-out.""" action_cache_process = subprocess.run( ['bazel', 'dump', '--action_cache'], @@ -232,7 +232,7 @@ def _is_nvcc(path: str): return os.path.basename(path).startswith('nvcc') -def _get_headers_gcc(compile_action, source_path: str, action_key: str): +def _get_headers_gcc(compile_action, source_path: str, action_key: str, bazel_binary:str): """Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.) Relatively slow. Requires running the C preprocessor if we can't hit Bazel's cache. @@ -240,7 +240,7 @@ def _get_headers_gcc(compile_action, source_path: str, action_key: str): # Flags reference here: https://clang.llvm.org/docs/ClangCommandLineReference.html # Check to see if Bazel has an (approximately) fresh cache of the included headers, and if so, use them to avoid a slow preprocessing step. - if action_key in _get_bazel_cached_action_keys(): # Safe because Bazel only holds one cached action key per path, and the key contains the path. + if action_key in _get_bazel_cached_action_keys(bazel_binary): # Safe because Bazel only holds one cached action key per path, and the key contains the path. for i, arg in enumerate(compile_action.arguments): if arg.startswith('-MF'): if len(arg) > 3: # Either appended, like -MF @@ -517,7 +517,7 @@ def _file_is_in_main_workspace_and_not_external(file_str: str): return True -def _get_headers(compile_action, source_path: str): +def _get_headers(compile_action, source_path: str, bazel_binary: str): """Gets the headers used by a particular compile command. Relatively slow. Requires running the C preprocessor. @@ -588,7 +588,7 @@ def _get_headers(compile_action, source_path: str): if compile_action.arguments[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe headers, should_cache = _get_headers_msvc(compile_action, source_path) else: - headers, should_cache = _get_headers_gcc(compile_action, source_path, compile_action.actionKey) + headers, should_cache = _get_headers_gcc(compile_action, source_path, compile_action.actionKey, bazel_binary) # Cache for future use if output_file and should_cache: @@ -610,7 +610,7 @@ def _get_headers(compile_action, source_path: str): _get_headers.has_logged = False -def _get_files(compile_action): +def _get_files(compile_action, bazel_binary): """Gets the ({source files}, {header files}) clangd should be told the command applies to.""" # Getting the source file is a little trickier than it might seem. @@ -670,7 +670,7 @@ def _get_files(compile_action): if os.path.splitext(source_file)[1] in _get_files.assembly_source_extensions: return {source_file}, set() - header_files = _get_headers(compile_action, source_file) + header_files = _get_headers(compile_action, source_file, bazel_binary) # Ambiguous .h headers need a language specified if they aren't C, or clangd sometimes makes mistakes # Delete this and unused extension variables when clangd >= 16 is released, since their underlying issues are resolved at HEAD @@ -1097,7 +1097,7 @@ def _nvcc_patch(compile_args: typing.List[str]) -> typing.List[str]: } -def _get_cpp_command_for_files(compile_action): +def _get_cpp_command_for_files(compile_action, bazel_binary: str): """Reformat compile_action into a compile command clangd can understand. Undo Bazel-isms and figures out which files clangd should apply the command to. @@ -1113,7 +1113,7 @@ def _get_cpp_command_for_files(compile_action): # Android and Linux and grailbio LLVM toolchains: Fine as is; no special patching needed. compile_action.arguments = _all_platform_patch(compile_action.arguments) - source_files, header_files = _get_files(compile_action) + source_files, header_files = _get_files(compile_action, bazel_binary) # Done after getting files since we may execute NVCC to get the files. compile_action.arguments = _nvcc_patch(compile_action.arguments) @@ -1121,7 +1121,7 @@ def _get_cpp_command_for_files(compile_action): return source_files, header_files, compile_action.arguments -def _convert_compile_commands(aquery_output): +def _convert_compile_commands(aquery_output, bazel_binary: str): """Converts from Bazel's aquery format to de-Bazeled compile_commands.json entries. Input: jsonproto output from aquery, pre-filtered to (Objective-)C(++) and CUDA compile actions for a given build. @@ -1145,7 +1145,7 @@ def _convert_compile_commands(aquery_output): with concurrent.futures.ThreadPoolExecutor( max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor ) as threadpool: - outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions) + outputs = threadpool.map(_get_cpp_command_for_files, args=(aquery_output.actions, bazel_binary)) # Yield as compile_commands.json entries header_files_already_written = set() @@ -1171,7 +1171,7 @@ def _convert_compile_commands(aquery_output): } -def _get_commands(target: str, flags: str): +def _get_commands(target: str, flags: str, bazel_binary: str): """Yields compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") @@ -1227,7 +1227,7 @@ def _get_commands(target: str, flags: str): '--features=-layering_check', ] - if _get_bazel_version() >= (6, 1, 0): + if _get_bazel_version(bazel_binary) >= (6, 1, 0): aquery_args += ['--host_features=-compiler_param_file', '--host_features=-layering_check'] aquery_args += additional_flags @@ -1269,7 +1269,7 @@ def _get_commands(target: str, flags: str): Continuing gracefully...""") return - yield from _convert_compile_commands(parsed_aquery_output) + yield from _convert_compile_commands(parsed_aquery_output, bazel_binary) # Log clear completion messages @@ -1405,9 +1405,15 @@ def main(): # End: template filled by Bazel ] + bazel_binary = ( + # Begin: template filled by Bazel + {bazel_binary} + # End: template filled by Bazel + ) + compile_command_entries = [] for (target, flags) in target_flag_pairs: - compile_command_entries.extend(_get_commands(target, flags)) + compile_command_entries.extend(_get_commands(target, flags, bazel_binary)) if not compile_command_entries: log_error(""">>> Not (over)writing compile_commands.json, since no commands were extracted and an empty file is of no use. diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 0210d42..cf7903e 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -115,12 +115,14 @@ def _expand_template_impl(ctx): "{exclude_headers}": repr(ctx.attr.exclude_headers), "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources), "{print_args_executable}": repr(ctx.executable._print_args_executable.path), + "{bazel_binary}": repr(ctx.attr.bazel_binary), }, ) return DefaultInfo(files = depset([script])) _expand_template = rule( attrs = { + "bazel_binary": attr.string(default = 'bazel'), "labels_to_flags": attr.string_dict(mandatory = True), # string keys instead of label_keyed because Bazel doesn't support parsing wildcard target patterns (..., *, :all) in BUILD attributes. "exclude_external_sources": attr.bool(default = False), "exclude_headers": attr.string(values = ["all", "external", ""]), # "" needed only for compatibility with Bazel < 3.6.0 From 558b08e3376907554ab76d9ce7536e0f28e9f631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olle=20Hyn=C3=A9n=20Ulfsj=C3=B6=C3=B6?= Date: Wed, 22 May 2024 09:38:45 +0200 Subject: [PATCH 2/2] Bazel binary --- README.md | 3 +++ refresh.template.py | 45 +++++++++++++++++++++--------------- refresh_compile_commands.bzl | 5 +++- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index dcf07f0..dd4f1aa 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,9 @@ load("@hedron_compile_commands//:refresh_compile_commands.bzl", "refresh_compile refresh_compile_commands( name = "refresh_compile_commands", + # Change the name of the bazel executable (defaults to bazel) + # bazel_binary = "./bazel_wrapper" + # Specify the targets of interest. # For example, specify a dict of targets and any flags required to build. targets = { diff --git a/refresh.template.py b/refresh.template.py index 3b33701..f20f894 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -96,7 +96,7 @@ def _print_header_finding_warning_once(): @functools.lru_cache(maxsize=None) -def _get_bazel_version(): +def _get_bazel_version(bazel_binary): """Gets the Bazel version as a tuple of (major, minor, patch). The rolling release and the release candidate are treated as the LTS release. @@ -104,7 +104,7 @@ def _get_bazel_version(): If the version can't be determined, returns (0, 0, 0). """ bazel_version_process = subprocess.run( - ['bazel', 'version'], + [bazel_binary, 'version'], # MIN_PY=3.7: Replace PIPEs with capture_output. stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -127,10 +127,10 @@ def _get_bazel_version(): @functools.lru_cache(maxsize=None) -def _get_bazel_cached_action_keys(): +def _get_bazel_cached_action_keys(bazel_binary): """Gets the set of actionKeys cached in bazel-out.""" action_cache_process = subprocess.run( - ['bazel', 'dump', '--action_cache'], + [bazel_binary, 'dump', '--action_cache'], # MIN_PY=3.7: Replace PIPEs with capture_output. stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -232,7 +232,7 @@ def _is_nvcc(path: str): return os.path.basename(path).startswith('nvcc') -def _get_headers_gcc(compile_action, source_path: str, action_key: str): +def _get_headers_gcc(compile_action, source_path: str, action_key: str, bazel_binary: str): """Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.) Relatively slow. Requires running the C preprocessor if we can't hit Bazel's cache. @@ -240,7 +240,7 @@ def _get_headers_gcc(compile_action, source_path: str, action_key: str): # Flags reference here: https://clang.llvm.org/docs/ClangCommandLineReference.html # Check to see if Bazel has an (approximately) fresh cache of the included headers, and if so, use them to avoid a slow preprocessing step. - if action_key in _get_bazel_cached_action_keys(): # Safe because Bazel only holds one cached action key per path, and the key contains the path. + if action_key in _get_bazel_cached_action_keys(bazel_binary): # Safe because Bazel only holds one cached action key per path, and the key contains the path. for i, arg in enumerate(compile_action.arguments): if arg.startswith('-MF'): if len(arg) > 3: # Either appended, like -MF @@ -517,7 +517,7 @@ def _file_is_in_main_workspace_and_not_external(file_str: str): return True -def _get_headers(compile_action, source_path: str): +def _get_headers(compile_action, source_path: str, bazel_binary: str): """Gets the headers used by a particular compile command. Relatively slow. Requires running the C preprocessor. @@ -588,7 +588,7 @@ def _get_headers(compile_action, source_path: str): if compile_action.arguments[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe headers, should_cache = _get_headers_msvc(compile_action, source_path) else: - headers, should_cache = _get_headers_gcc(compile_action, source_path, compile_action.actionKey) + headers, should_cache = _get_headers_gcc(compile_action, source_path, compile_action.actionKey, bazel_binary) # Cache for future use if output_file and should_cache: @@ -610,7 +610,7 @@ def _get_headers(compile_action, source_path: str): _get_headers.has_logged = False -def _get_files(compile_action): +def _get_files(compile_action, bazel_binary): """Gets the ({source files}, {header files}) clangd should be told the command applies to.""" # Getting the source file is a little trickier than it might seem. @@ -670,7 +670,7 @@ def _get_files(compile_action): if os.path.splitext(source_file)[1] in _get_files.assembly_source_extensions: return {source_file}, set() - header_files = _get_headers(compile_action, source_file) + header_files = _get_headers(compile_action, source_file, bazel_binary) # Ambiguous .h headers need a language specified if they aren't C, or clangd sometimes makes mistakes # Delete this and unused extension variables when clangd >= 16 is released, since their underlying issues are resolved at HEAD @@ -1097,11 +1097,12 @@ def _nvcc_patch(compile_args: typing.List[str]) -> typing.List[str]: } -def _get_cpp_command_for_files(compile_action): +def _get_cpp_command_for_files(compile_action, bazel_binary: str): """Reformat compile_action into a compile command clangd can understand. Undo Bazel-isms and figures out which files clangd should apply the command to. """ + # Condense aquery's environment variables into a dictionary, the format you might expect. compile_action.environmentVariables = {pair.key: pair.value for pair in getattr(compile_action, 'environmentVariables', [])} if 'PATH' not in compile_action.environmentVariables: # Bazel only adds if --incompatible_strict_action_env is passed--and otherwise inherits. @@ -1113,7 +1114,7 @@ def _get_cpp_command_for_files(compile_action): # Android and Linux and grailbio LLVM toolchains: Fine as is; no special patching needed. compile_action.arguments = _all_platform_patch(compile_action.arguments) - source_files, header_files = _get_files(compile_action) + source_files, header_files = _get_files(compile_action, bazel_binary) # Done after getting files since we may execute NVCC to get the files. compile_action.arguments = _nvcc_patch(compile_action.arguments) @@ -1121,7 +1122,7 @@ def _get_cpp_command_for_files(compile_action): return source_files, header_files, compile_action.arguments -def _convert_compile_commands(aquery_output): +def _convert_compile_commands(aquery_output, bazel_binary: str): """Converts from Bazel's aquery format to de-Bazeled compile_commands.json entries. Input: jsonproto output from aquery, pre-filtered to (Objective-)C(++) and CUDA compile actions for a given build. @@ -1145,7 +1146,7 @@ def _convert_compile_commands(aquery_output): with concurrent.futures.ThreadPoolExecutor( max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor ) as threadpool: - outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions) + outputs = threadpool.map(lambda p: _get_cpp_command_for_files(*p), [(action, bazel_binary) for action in aquery_output.actions]) # Yield as compile_commands.json entries header_files_already_written = set() @@ -1171,7 +1172,7 @@ def _convert_compile_commands(aquery_output): } -def _get_commands(target: str, flags: str): +def _get_commands(target: str, flags: str, bazel_binary: str): """Yields compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") @@ -1200,7 +1201,7 @@ def _get_commands(target: str, flags: str): # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. target_statment = f"filter('^(//|@//)',{target_statment})" aquery_args = [ - 'bazel', + bazel_binary, 'aquery', # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto @@ -1227,7 +1228,7 @@ def _get_commands(target: str, flags: str): '--features=-layering_check', ] - if _get_bazel_version() >= (6, 1, 0): + if _get_bazel_version(bazel_binary) >= (6, 1, 0): aquery_args += ['--host_features=-compiler_param_file', '--host_features=-layering_check'] aquery_args += additional_flags @@ -1269,7 +1270,7 @@ def _get_commands(target: str, flags: str): Continuing gracefully...""") return - yield from _convert_compile_commands(parsed_aquery_output) + yield from _convert_compile_commands(parsed_aquery_output, bazel_binary) # Log clear completion messages @@ -1405,9 +1406,15 @@ def main(): # End: template filled by Bazel ] + bazel_binary = ( + # Begin: template filled by Bazel + {bazel_binary} + # End: template filled by Bazel + ) + compile_command_entries = [] for (target, flags) in target_flag_pairs: - compile_command_entries.extend(_get_commands(target, flags)) + compile_command_entries.extend(_get_commands(target, flags, bazel_binary)) if not compile_command_entries: log_error(""">>> Not (over)writing compile_commands.json, since no commands were extracted and an empty file is of no use. diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 0210d42..768819a 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -64,6 +64,7 @@ def refresh_compile_commands( targets = None, exclude_headers = None, exclude_external_sources = False, + bazel_binary = 'bazel', **kwargs): # For the other common attributes. Tags, compatible_with, etc. https://docs.bazel.build/versions/main/be/common-definitions.html#common-attributes. # Convert the various, acceptable target shorthands into the dictionary format # In Python, `type(x) == y` is an antipattern, but [Starlark doesn't support inheritance](https://bazel.build/rules/language), so `isinstance` doesn't exist, and this is the correct way to switch on type. @@ -89,7 +90,7 @@ def refresh_compile_commands( # Generate the core, runnable python script from refresh.template.py script_name = name + ".py" - _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, **kwargs) + _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, bazel_binary = bazel_binary, **kwargs) # Combine them so the wrapper calls the main script native.py_binary( @@ -115,12 +116,14 @@ def _expand_template_impl(ctx): "{exclude_headers}": repr(ctx.attr.exclude_headers), "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources), "{print_args_executable}": repr(ctx.executable._print_args_executable.path), + "{bazel_binary}": repr(ctx.attr.bazel_binary), }, ) return DefaultInfo(files = depset([script])) _expand_template = rule( attrs = { + "bazel_binary": attr.string(default = 'bazel'), "labels_to_flags": attr.string_dict(mandatory = True), # string keys instead of label_keyed because Bazel doesn't support parsing wildcard target patterns (..., *, :all) in BUILD attributes. "exclude_external_sources": attr.bool(default = False), "exclude_headers": attr.string(values = ["all", "external", ""]), # "" needed only for compatibility with Bazel < 3.6.0