From 9c864691b1b8b8b3c17eb05dfdf493e562258338 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Olle=20Hyn=C3=A9n=20Ulfsj=C3=B6=C3=B6?=
 <olle.hynenulfsjoo@niradynamics.se>
Date: Tue, 21 May 2024 15:09:26 +0200
Subject: [PATCH 1/2] Add bazel bin

---
 refresh.template.py          | 40 +++++++++++++++++++++---------------
 refresh_compile_commands.bzl |  2 ++
 2 files changed, 25 insertions(+), 17 deletions(-)
diff --git a/refresh.template.py b/refresh.template.py
index 3b33701..ff662b5 100644
--- a/refresh.template.py
+++ b/refresh.template.py
@@ -95,7 +95,7 @@ def _print_header_finding_warning_once():
 _print_header_finding_warning_once.has_logged = False
 
 
-@functools.lru_cache(maxsize=None)
+@functools.lru_cache(bazel_binary, maxsize=None)
 def _get_bazel_version():
     """Gets the Bazel version as a tuple of (major, minor, patch).
 
@@ -104,7 +104,7 @@ def _get_bazel_version():
     If the version can't be determined, returns (0, 0, 0).
     """
     bazel_version_process = subprocess.run(
-        ['bazel', 'version'],
+        [bazel_binary, 'version'],
         # MIN_PY=3.7: Replace PIPEs with capture_output.
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
@@ -127,7 +127,7 @@ def _get_bazel_version():
 
 
 @functools.lru_cache(maxsize=None)
-def _get_bazel_cached_action_keys():
+def _get_bazel_cached_action_keys(bazel_binary):
     """Gets the set of actionKeys cached in bazel-out."""
     action_cache_process = subprocess.run(
         ['bazel', 'dump', '--action_cache'],
@@ -232,7 +232,7 @@ def _is_nvcc(path: str):
     return os.path.basename(path).startswith('nvcc')
 
 
-def _get_headers_gcc(compile_action, source_path: str, action_key: str):
+def _get_headers_gcc(compile_action, source_path: str, action_key: str, bazel_binary:str):
     """Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.)
 
     Relatively slow. Requires running the C preprocessor if we can't hit Bazel's cache.
@@ -240,7 +240,7 @@ def _get_headers_gcc(compile_action, source_path: str, action_key: str):
     # Flags reference here: https://clang.llvm.org/docs/ClangCommandLineReference.html
 
     # Check to see if Bazel has an (approximately) fresh cache of the included headers, and if so, use them to avoid a slow preprocessing step.
-    if action_key in _get_bazel_cached_action_keys():  # Safe because Bazel only holds one cached action key per path, and the key contains the path.
+    if action_key in _get_bazel_cached_action_keys(bazel_binary):  # Safe because Bazel only holds one cached action key per path, and the key contains the path.
         for i, arg in enumerate(compile_action.arguments):
             if arg.startswith('-MF'):
                 if len(arg) > 3: # Either appended, like -MF<file>
@@ -517,7 +517,7 @@ def _file_is_in_main_workspace_and_not_external(file_str: str):
     return True
 
 
-def _get_headers(compile_action, source_path: str):
+def _get_headers(compile_action, source_path: str, bazel_binary: str):
     """Gets the headers used by a particular compile command.
 
     Relatively slow. Requires running the C preprocessor.
@@ -588,7 +588,7 @@ def _get_headers(compile_action, source_path: str):
     if compile_action.arguments[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe
         headers, should_cache = _get_headers_msvc(compile_action, source_path)
     else:
-        headers, should_cache = _get_headers_gcc(compile_action, source_path, compile_action.actionKey)
+        headers, should_cache = _get_headers_gcc(compile_action, source_path, compile_action.actionKey, bazel_binary)
 
     # Cache for future use
     if output_file and should_cache:
@@ -610,7 +610,7 @@ def _get_headers(compile_action, source_path: str):
 _get_headers.has_logged = False
 
 
-def _get_files(compile_action):
+def _get_files(compile_action, bazel_binary):
     """Gets the ({source files}, {header files}) clangd should be told the command applies to."""
 
     # Getting the source file is a little trickier than it might seem.
@@ -670,7 +670,7 @@ def _get_files(compile_action):
     if os.path.splitext(source_file)[1] in _get_files.assembly_source_extensions:
         return {source_file}, set()
 
-    header_files = _get_headers(compile_action, source_file)
+    header_files = _get_headers(compile_action, source_file, bazel_binary)
 
     # Ambiguous .h headers need a language specified if they aren't C, or clangd sometimes makes mistakes
     # Delete this and unused extension variables when clangd >= 16 is released, since their underlying issues are resolved at HEAD
@@ -1097,7 +1097,7 @@ def _nvcc_patch(compile_args: typing.List[str]) -> typing.List[str]:
 }
 
 
-def _get_cpp_command_for_files(compile_action):
+def _get_cpp_command_for_files(compile_action, bazel_binary: str):
     """Reformat compile_action into a compile command clangd can understand.
 
     Undo Bazel-isms and figures out which files clangd should apply the command to.
@@ -1113,7 +1113,7 @@ def _get_cpp_command_for_files(compile_action):
     # Android and Linux and grailbio LLVM toolchains: Fine as is; no special patching needed.
     compile_action.arguments = _all_platform_patch(compile_action.arguments)
 
-    source_files, header_files = _get_files(compile_action)
+    source_files, header_files = _get_files(compile_action, bazel_binary)
 
     # Done after getting files since we may execute NVCC to get the files.
     compile_action.arguments = _nvcc_patch(compile_action.arguments)
@@ -1121,7 +1121,7 @@ def _get_cpp_command_for_files(compile_action):
     return source_files, header_files, compile_action.arguments
 
 
-def _convert_compile_commands(aquery_output):
+def _convert_compile_commands(aquery_output, bazel_binary: str):
     """Converts from Bazel's aquery format to de-Bazeled compile_commands.json entries.
 
     Input: jsonproto output from aquery, pre-filtered to (Objective-)C(++) and CUDA compile actions for a given build.
@@ -1145,7 +1145,7 @@ def _convert_compile_commands(aquery_output):
     with concurrent.futures.ThreadPoolExecutor(
         max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
     ) as threadpool:
-        outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions)
+        outputs = threadpool.map(_get_cpp_command_for_files, args=(aquery_output.actions, bazel_binary))
 
     # Yield as compile_commands.json entries
     header_files_already_written = set()
@@ -1171,7 +1171,7 @@ def _convert_compile_commands(aquery_output):
             }
 
 
-def _get_commands(target: str, flags: str):
+def _get_commands(target: str, flags: str, bazel_binary: str):
     """Yields compile_commands.json entries for a given target and flags, gracefully tolerating errors."""
     # Log clear completion messages
     log_info(f">>> Analyzing commands used in {target}")
@@ -1227,7 +1227,7 @@ def _get_commands(target: str, flags: str):
         '--features=-layering_check',
     ]
 
-    if _get_bazel_version() >= (6, 1, 0):
+    if _get_bazel_version(bazel_binary) >= (6, 1, 0):
         aquery_args += ['--host_features=-compiler_param_file', '--host_features=-layering_check']
 
     aquery_args += additional_flags
@@ -1269,7 +1269,7 @@ def _get_commands(target: str, flags: str):
     Continuing gracefully...""")
         return
 
-    yield from _convert_compile_commands(parsed_aquery_output)
+    yield from _convert_compile_commands(parsed_aquery_output, bazel_binary)
 
 
     # Log clear completion messages
@@ -1405,9 +1405,15 @@ def main():
         # End:   template filled by Bazel
     ]
 
+    bazel_binary = (
+        # Begin: template filled by Bazel
+        {bazel_binary}
+        # End:   template filled by Bazel
+        )
+
     compile_command_entries = []
     for (target, flags) in target_flag_pairs:
-        compile_command_entries.extend(_get_commands(target, flags))
+        compile_command_entries.extend(_get_commands(target, flags, bazel_binary))
 
     if not compile_command_entries:
         log_error(""">>> Not (over)writing compile_commands.json, since no commands were extracted and an empty file is of no use.
diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl
index 0210d42..cf7903e 100644
--- a/refresh_compile_commands.bzl
+++ b/refresh_compile_commands.bzl
@@ -115,12 +115,14 @@ def _expand_template_impl(ctx):
             "{exclude_headers}": repr(ctx.attr.exclude_headers),
             "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources),
             "{print_args_executable}": repr(ctx.executable._print_args_executable.path),
+            "{bazel_binary}": repr(ctx.attr.bazel_binary),
         },
     )
     return DefaultInfo(files = depset([script]))
 
 _expand_template = rule(
     attrs = {
+        "bazel_binary": attr.string(default = 'bazel'),
         "labels_to_flags": attr.string_dict(mandatory = True),  # string keys instead of label_keyed because Bazel doesn't support parsing wildcard target patterns (..., *, :all) in BUILD attributes.
         "exclude_external_sources": attr.bool(default = False),
         "exclude_headers": attr.string(values = ["all", "external", ""]),  # "" needed only for compatibility with Bazel < 3.6.0

From 558b08e3376907554ab76d9ce7536e0f28e9f631 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Olle=20Hyn=C3=A9n=20Ulfsj=C3=B6=C3=B6?=
 <olle.hynenulfsjoo@niradynamics.se>
Date: Wed, 22 May 2024 09:38:45 +0200
Subject: [PATCH 2/2] Bazel binary

---
 README.md                    |  3 +++
 refresh.template.py          | 45 +++++++++++++++++++++---------------
 refresh_compile_commands.bzl |  5 +++-
 3 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index dcf07f0..dd4f1aa 100644
--- a/README.md
+++ b/README.md
@@ -135,6 +135,9 @@ load("@hedron_compile_commands//:refresh_compile_commands.bzl", "refresh_compile
 refresh_compile_commands(
     name = "refresh_compile_commands",
 
+    # Change the name of the bazel executable (defaults to bazel)
+    # bazel_binary = "./bazel_wrapper"
+
     # Specify the targets of interest.
     # For example, specify a dict of targets and any flags required to build.
     targets = {
diff --git a/refresh.template.py b/refresh.template.py
index 3b33701..f20f894 100644
--- a/refresh.template.py
+++ b/refresh.template.py
@@ -96,7 +96,7 @@ def _print_header_finding_warning_once():
 
 
 @functools.lru_cache(maxsize=None)
-def _get_bazel_version():
+def _get_bazel_version(bazel_binary):
     """Gets the Bazel version as a tuple of (major, minor, patch).
 
     The rolling release and the release candidate are treated as the LTS release.
@@ -104,7 +104,7 @@ def _get_bazel_version():
     If the version can't be determined, returns (0, 0, 0).
     """
     bazel_version_process = subprocess.run(
-        ['bazel', 'version'],
+        [bazel_binary, 'version'],
         # MIN_PY=3.7: Replace PIPEs with capture_output.
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
@@ -127,10 +127,10 @@ def _get_bazel_version():
 
 
 @functools.lru_cache(maxsize=None)
-def _get_bazel_cached_action_keys():
+def _get_bazel_cached_action_keys(bazel_binary):
     """Gets the set of actionKeys cached in bazel-out."""
     action_cache_process = subprocess.run(
-        ['bazel', 'dump', '--action_cache'],
+        [bazel_binary, 'dump', '--action_cache'],
         # MIN_PY=3.7: Replace PIPEs with capture_output.
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
@@ -232,7 +232,7 @@ def _is_nvcc(path: str):
     return os.path.basename(path).startswith('nvcc')
 
 
-def _get_headers_gcc(compile_action, source_path: str, action_key: str):
+def _get_headers_gcc(compile_action, source_path: str, action_key: str, bazel_binary: str):
     """Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.)
 
     Relatively slow. Requires running the C preprocessor if we can't hit Bazel's cache.
@@ -240,7 +240,7 @@ def _get_headers_gcc(compile_action, source_path: str, action_key: str):
     # Flags reference here: https://clang.llvm.org/docs/ClangCommandLineReference.html
 
     # Check to see if Bazel has an (approximately) fresh cache of the included headers, and if so, use them to avoid a slow preprocessing step.
-    if action_key in _get_bazel_cached_action_keys():  # Safe because Bazel only holds one cached action key per path, and the key contains the path.
+    if action_key in _get_bazel_cached_action_keys(bazel_binary):  # Safe because Bazel only holds one cached action key per path, and the key contains the path.
         for i, arg in enumerate(compile_action.arguments):
             if arg.startswith('-MF'):
                 if len(arg) > 3: # Either appended, like -MF<file>
@@ -517,7 +517,7 @@ def _file_is_in_main_workspace_and_not_external(file_str: str):
     return True
 
 
-def _get_headers(compile_action, source_path: str):
+def _get_headers(compile_action, source_path: str, bazel_binary: str):
     """Gets the headers used by a particular compile command.
 
     Relatively slow. Requires running the C preprocessor.
@@ -588,7 +588,7 @@ def _get_headers(compile_action, source_path: str):
     if compile_action.arguments[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe
         headers, should_cache = _get_headers_msvc(compile_action, source_path)
     else:
-        headers, should_cache = _get_headers_gcc(compile_action, source_path, compile_action.actionKey)
+        headers, should_cache = _get_headers_gcc(compile_action, source_path, compile_action.actionKey, bazel_binary)
 
     # Cache for future use
     if output_file and should_cache:
@@ -610,7 +610,7 @@ def _get_headers(compile_action, source_path: str):
 _get_headers.has_logged = False
 
 
-def _get_files(compile_action):
+def _get_files(compile_action, bazel_binary):
     """Gets the ({source files}, {header files}) clangd should be told the command applies to."""
 
     # Getting the source file is a little trickier than it might seem.
@@ -670,7 +670,7 @@ def _get_files(compile_action):
     if os.path.splitext(source_file)[1] in _get_files.assembly_source_extensions:
         return {source_file}, set()
 
-    header_files = _get_headers(compile_action, source_file)
+    header_files = _get_headers(compile_action, source_file, bazel_binary)
 
     # Ambiguous .h headers need a language specified if they aren't C, or clangd sometimes makes mistakes
     # Delete this and unused extension variables when clangd >= 16 is released, since their underlying issues are resolved at HEAD
@@ -1097,11 +1097,12 @@ def _nvcc_patch(compile_args: typing.List[str]) -> typing.List[str]:
 }
 
 
-def _get_cpp_command_for_files(compile_action):
+def _get_cpp_command_for_files(compile_action, bazel_binary: str):
     """Reformat compile_action into a compile command clangd can understand.
 
     Undo Bazel-isms and figures out which files clangd should apply the command to.
     """
+
     # Condense aquery's environment variables into a dictionary, the format you might expect.
     compile_action.environmentVariables = {pair.key: pair.value for pair in getattr(compile_action, 'environmentVariables', [])}
     if 'PATH' not in compile_action.environmentVariables: # Bazel only adds if --incompatible_strict_action_env is passed--and otherwise inherits.
@@ -1113,7 +1114,7 @@ def _get_cpp_command_for_files(compile_action):
     # Android and Linux and grailbio LLVM toolchains: Fine as is; no special patching needed.
     compile_action.arguments = _all_platform_patch(compile_action.arguments)
 
-    source_files, header_files = _get_files(compile_action)
+    source_files, header_files = _get_files(compile_action, bazel_binary)
 
     # Done after getting files since we may execute NVCC to get the files.
     compile_action.arguments = _nvcc_patch(compile_action.arguments)
@@ -1121,7 +1122,7 @@ def _get_cpp_command_for_files(compile_action):
     return source_files, header_files, compile_action.arguments
 
 
-def _convert_compile_commands(aquery_output):
+def _convert_compile_commands(aquery_output, bazel_binary: str):
     """Converts from Bazel's aquery format to de-Bazeled compile_commands.json entries.
 
     Input: jsonproto output from aquery, pre-filtered to (Objective-)C(++) and CUDA compile actions for a given build.
@@ -1145,7 +1146,7 @@ def _convert_compile_commands(aquery_output):
     with concurrent.futures.ThreadPoolExecutor(
         max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
     ) as threadpool:
-        outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions)
+        outputs = threadpool.map(lambda p: _get_cpp_command_for_files(*p), [(action, bazel_binary) for action in aquery_output.actions])
 
     # Yield as compile_commands.json entries
     header_files_already_written = set()
@@ -1171,7 +1172,7 @@ def _convert_compile_commands(aquery_output):
             }
 
 
-def _get_commands(target: str, flags: str):
+def _get_commands(target: str, flags: str, bazel_binary: str):
     """Yields compile_commands.json entries for a given target and flags, gracefully tolerating errors."""
     # Log clear completion messages
     log_info(f">>> Analyzing commands used in {target}")
@@ -1200,7 +1201,7 @@ def _get_commands(target: str, flags: str):
         # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers.
         target_statment = f"filter('^(//|@//)',{target_statment})"
     aquery_args = [
-        'bazel',
+        bazel_binary,
         'aquery',
         # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html
         # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto
@@ -1227,7 +1228,7 @@ def _get_commands(target: str, flags: str):
         '--features=-layering_check',
     ]
 
-    if _get_bazel_version() >= (6, 1, 0):
+    if _get_bazel_version(bazel_binary) >= (6, 1, 0):
         aquery_args += ['--host_features=-compiler_param_file', '--host_features=-layering_check']
 
     aquery_args += additional_flags
@@ -1269,7 +1270,7 @@ def _get_commands(target: str, flags: str):
     Continuing gracefully...""")
         return
 
-    yield from _convert_compile_commands(parsed_aquery_output)
+    yield from _convert_compile_commands(parsed_aquery_output, bazel_binary)
 
 
     # Log clear completion messages
@@ -1405,9 +1406,15 @@ def main():
         # End:   template filled by Bazel
     ]
 
+    bazel_binary = (
+        # Begin: template filled by Bazel
+        {bazel_binary}
+        # End:   template filled by Bazel
+        )
+
     compile_command_entries = []
     for (target, flags) in target_flag_pairs:
-        compile_command_entries.extend(_get_commands(target, flags))
+        compile_command_entries.extend(_get_commands(target, flags, bazel_binary))
 
     if not compile_command_entries:
         log_error(""">>> Not (over)writing compile_commands.json, since no commands were extracted and an empty file is of no use.
diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl
index 0210d42..768819a 100644
--- a/refresh_compile_commands.bzl
+++ b/refresh_compile_commands.bzl
@@ -64,6 +64,7 @@ def refresh_compile_commands(
         targets = None,
         exclude_headers = None,
         exclude_external_sources = False,
+        bazel_binary = 'bazel',
         **kwargs):  # For the other common attributes. Tags, compatible_with, etc. https://docs.bazel.build/versions/main/be/common-definitions.html#common-attributes.
     # Convert the various, acceptable target shorthands into the dictionary format
     # In Python, `type(x) == y` is an antipattern, but [Starlark doesn't support inheritance](https://bazel.build/rules/language), so `isinstance` doesn't exist, and this is the correct way to switch on type.
@@ -89,7 +90,7 @@ def refresh_compile_commands(
 
     # Generate the core, runnable python script from refresh.template.py
     script_name = name + ".py"
-    _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, **kwargs)
+    _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, bazel_binary = bazel_binary, **kwargs)
 
     # Combine them so the wrapper calls the main script
     native.py_binary(
@@ -115,12 +116,14 @@ def _expand_template_impl(ctx):
             "{exclude_headers}": repr(ctx.attr.exclude_headers),
             "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources),
             "{print_args_executable}": repr(ctx.executable._print_args_executable.path),
+            "{bazel_binary}": repr(ctx.attr.bazel_binary),
         },
     )
     return DefaultInfo(files = depset([script]))
 
 _expand_template = rule(
     attrs = {
+        "bazel_binary": attr.string(default = 'bazel'),
         "labels_to_flags": attr.string_dict(mandatory = True),  # string keys instead of label_keyed because Bazel doesn't support parsing wildcard target patterns (..., *, :all) in BUILD attributes.
         "exclude_external_sources": attr.bool(default = False),
         "exclude_headers": attr.string(values = ["all", "external", ""]),  # "" needed only for compatibility with Bazel < 3.6.0