Skip to content

Fix mypy errors in src/codegen/gscli/generate directory #994

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions src/codegen/gscli/backend/typestub_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
import re
from collections.abc import Callable
from concurrent.futures import ThreadPoolExecutor
from typing import TypeVar, Union

import astor

from codegen.shared.logging.get_logger import get_logger

logger = get_logger(__name__)

# Define a type variable for AST nodes
ASTNode = TypeVar("ASTNode", ast.FunctionDef, ast.AnnAssign, ast.Assign)


class MethodRemover(ast.NodeTransformer):
def __init__(self, conditions: list[Callable[[ast.FunctionDef], bool]]):
Expand All @@ -19,12 +23,12 @@
body = []

for child in node.body:
if not self.should_remove(child):

Check failure on line 26 in src/codegen/gscli/backend/typestub_utils.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "should_remove" of "MethodRemover" has incompatible type "stmt"; expected "FunctionDef | AnnAssign" [arg-type]
body.append(child)
else:
logger.debug("removing", child.name)

Check failure on line 29 in src/codegen/gscli/backend/typestub_utils.py

View workflow job for this annotation

GitHub Actions / mypy

error: "stmt" has no attribute "name" [attr-defined]
node.body = body
return self.generic_visit(node)

Check failure on line 31 in src/codegen/gscli/backend/typestub_utils.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "AST", expected "ClassDef") [return-value]

def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.FunctionDef | None:
body = []
Expand All @@ -34,7 +38,7 @@
else:
logger.debug("removing", child.name)
node.body = body
return self.generic_visit(node)

Check failure on line 41 in src/codegen/gscli/backend/typestub_utils.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "AST", expected "FunctionDef | None") [return-value]

def should_remove(self, node: ast.FunctionDef | ast.AnnAssign) -> bool:
if isinstance(node, ast.FunctionDef):
Expand All @@ -44,22 +48,22 @@


class FieldRemover(ast.NodeTransformer):
def __init__(self, conditions: list[Callable[[ast.FunctionDef], bool]]):
def __init__(self, conditions: list[Callable[[Union[ast.AnnAssign, ast.Assign]], bool]]):
self.conditions = conditions

def visit_ClassDef(self, node: ast.ClassDef) -> ast.ClassDef:
body = []
for child in node.body:
if not self.should_remove(child):

Check failure on line 57 in src/codegen/gscli/backend/typestub_utils.py

View workflow job for this annotation

GitHub Actions / mypy

error: Argument 1 to "should_remove" of "FieldRemover" has incompatible type "stmt"; expected "AnnAssign | Assign" [arg-type]
body.append(child)
else:
if isinstance(child, ast.AnnAssign):
logger.debug("removing", child.target.id)

Check failure on line 61 in src/codegen/gscli/backend/typestub_utils.py

View workflow job for this annotation

GitHub Actions / mypy

error: Item "Attribute" of "Name | Attribute | Subscript" has no attribute "id" [union-attr]

Check failure on line 61 in src/codegen/gscli/backend/typestub_utils.py

View workflow job for this annotation

GitHub Actions / mypy

error: Item "Subscript" of "Name | Attribute | Subscript" has no attribute "id" [union-attr]
if isinstance(child, ast.Assign):
for target in child.targets:
logger.debug("removing", target.id)

Check failure on line 64 in src/codegen/gscli/backend/typestub_utils.py

View workflow job for this annotation

GitHub Actions / mypy

error: "expr" has no attribute "id" [attr-defined]
node.body = body
return self.generic_visit(node)

Check failure on line 66 in src/codegen/gscli/backend/typestub_utils.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "AST", expected "ClassDef") [return-value]

def should_remove(self, node: ast.AnnAssign | ast.Assign) -> bool:
if isinstance(node, ast.AnnAssign):
Expand All @@ -79,20 +83,22 @@
return astor.to_source(modified_tree)


def _remove_fields(source: str, conditions: list[Callable[[ast.FunctionDef], bool]]) -> str:
def _remove_fields(source: str, conditions: list[Callable[[Union[ast.AnnAssign, ast.Assign]], bool]]) -> str:
tree = ast.parse(source)
transformer = FieldRemover(conditions)
modified_tree = transformer.visit(tree)
return astor.to_source(modified_tree)


def _starts_with_underscore(node: ast.FunctionDef | ast.AnnAssign | ast.Assign) -> bool:
def _starts_with_underscore(node: Union[ast.FunctionDef, ast.AnnAssign, ast.Assign]) -> bool:
if isinstance(node, ast.FunctionDef):
return node.name.startswith("_") and (not node.name.startswith("__") and not node.name.endswith("__"))
elif isinstance(node, ast.Assign):
return node.targets[0].id.startswith("_")
if isinstance(node.targets[0], ast.Name):
return node.targets[0].id.startswith("_")
elif isinstance(node, ast.AnnAssign):
return node.target.id.startswith("_")
if isinstance(node.target, ast.Name):
return node.target.id.startswith("_")
return False


Expand Down Expand Up @@ -121,7 +127,9 @@
_has_decorator("noapidoc"),
]

modified_content = _remove_fields(original_content, [_starts_with_underscore])
# Type cast _starts_with_underscore to the correct type for _remove_fields
field_condition = _starts_with_underscore
modified_content = _remove_fields(original_content, [field_condition])
modified_content = _remove_methods(modified_content, conditions)

if modified_content.strip().endswith(":"):
Expand Down
14 changes: 9 additions & 5 deletions src/codegen/gscli/generate/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import os
import re
import shutil
import sys
from typing import Any

import click
from termcolor import colored
Expand Down Expand Up @@ -66,7 +68,7 @@ def _generate_codebase_typestubs() -> None:
# right now this command expects you to run it from here
if not initial_dir.endswith("codegen/codegen-backend"):
print(colored("Error: Must be in a directory ending with 'codegen/codegen-backend'", "red"))
exit(1)
sys.exit(1)

out_dir = os.path.abspath(os.path.join(initial_dir, "typings"))
frontend_typestubs_dir = os.path.abspath(os.path.join(initial_dir, os.pardir, "codegen-frontend/assets/typestubs/graphsitter"))
Expand Down Expand Up @@ -113,6 +115,7 @@ def generate_docs(docs_dir: str) -> None:
@generate.command()
@click.argument("filepath", default=sdk.__path__[0] + "/system-prompt.txt", required=False)
def system_prompt(filepath: str) -> None:
"""Generate the system prompt and write it to the specified file"""
print(f"Generating system prompt and writing to {filepath}...")
new_system_prompt = get_system_prompt()
with open(filepath, "w") as f:
Expand All @@ -121,6 +124,7 @@ def system_prompt(filepath: str) -> None:


def get_snippet_pattern(target_name: str) -> str:
"""Generate a regex pattern to match code snippets with the given target name"""
pattern = rf"\[//\]: # \(--{re.escape(target_name)}--\)\s*(?:\[//\]: # \(--{re.escape(AUTO_GENERATED_COMMENT)}--\)\s*)?"
pattern += CODE_SNIPPETS_REGEX
return pattern
Expand Down Expand Up @@ -153,9 +157,9 @@ def generate_codegen_sdk_docs(docs_dir: str) -> None:

# Write the generated docs to the file system, splitting between core, python, and typescript
# keep track of where we put each one so we can update the mint.json
python_set = set()
typescript_set = set()
core_set = set()
python_set: set[str] = set()
typescript_set: set[str] = set()
core_set: set[str] = set()
# TODO replace this with new `get_mdx_for_class` function
for class_doc in gs_docs.classes:
class_name = class_doc.title
Expand All @@ -178,7 +182,7 @@ def generate_codegen_sdk_docs(docs_dir: str) -> None:
# Update the core, python, and typescript page sets in mint.json
mint_file_path = os.path.join(docs_dir, "mint.json")
with open(mint_file_path) as mint_file:
mint_data = json.load(mint_file)
mint_data: dict[str, Any] = json.load(mint_file)

# Find the "Codebase SDK" group where we want to add the pages
codebase_sdk_group = next(group for group in mint_data["navigation"] if group["group"] == "API Reference")
Expand Down
8 changes: 4 additions & 4 deletions src/codegen/gscli/generate/runner_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@
)


def fix_ruff_imports(objects: list[DocumentedObject]):
def fix_ruff_imports(objects: list[DocumentedObject]) -> None:
root, _ = split_git_path(str(Path(__file__)))
to_add = []
to_add: list[str] = []
for obj in objects:
to_add.append(f"{obj.module}.{obj.name}")
generics = tomlkit.array()
Expand All @@ -62,16 +62,16 @@
generics.add_line(indent="")
config = Path(root) / "ruff.toml"
toml_config = tomlkit.parse(config.read_text())
toml_config["lint"]["pyflakes"]["extend-generics"] = generics

Check failure on line 65 in src/codegen/gscli/generate/runner_imports.py

View workflow job for this annotation

GitHub Actions / mypy

error: Value of type "Item | Container" is not indexable [index]
config.write_text(tomlkit.dumps(toml_config))


def get_runner_imports(include_codegen=True, include_private_imports: bool = True) -> str:
def get_runner_imports(include_codegen: bool = True, include_private_imports: bool = True) -> str:
# get the imports from the apidoc, py_apidoc, and ts_apidoc
gs_objects = get_documented_objects()
gs_public_objects = list(chain(gs_objects["apidoc"], gs_objects["py_apidoc"], gs_objects["ts_apidoc"]))
fix_ruff_imports(gs_public_objects)
gs_public_imports = {f"from {obj.module} import {obj.name}" for obj in gs_public_objects}
gs_public_imports: set[str] = {f"from {obj.module} import {obj.name}" for obj in gs_public_objects}

# construct import string with all imports
ret = IMPORT_STRING_TEMPLATE.format(
Expand Down
4 changes: 2 additions & 2 deletions src/codegen/gscli/generate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ class LanguageType(StrEnum):
BOTH = "BOTH"


def generate_builtins_file(path_to_builtins: str, language_type: LanguageType):
def generate_builtins_file(path_to_builtins: str, language_type: LanguageType) -> None:
"""Generates and writes the builtins file"""
documented_imports = get_documented_objects()
all_objects = chain(documented_imports["apidoc"], documented_imports["py_apidoc"], documented_imports["ts_apidoc"])
unique_imports = {f"from {obj.module} import {obj.name} as {obj.name}" for obj in all_objects}
unique_imports: set[str] = {f"from {obj.module} import {obj.name} as {obj.name}" for obj in all_objects}
all_imports = "\n".join(sorted(unique_imports))
# TODO: re-use code with runner_imports list
# TODO: also auto generate import string for CodemodContext + MessageType
Expand Down