Skip to content

Commit

Permalink
model improvements
Browse files Browse the repository at this point in the history
Make tokens nonpublic attributes of model nodes.
Expose lineno/endlineno/col_offset/end_col_offset on nodes
Implement walk function to walk a node tree
  • Loading branch information
spyoungtech committed Aug 3, 2023
1 parent 700e89e commit d11ae38
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 97 deletions.
196 changes: 148 additions & 48 deletions json5/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

import math
import typing
from collections import deque
from typing import Any
from typing import Literal

Expand Down Expand Up @@ -33,30 +35,109 @@
]


def walk(root: Node) -> typing.Generator[Node, None, None]:
todo = deque([root])
while todo:
node: Node = todo.popleft()
todo.extend(iter_child_nodes(node))
yield node


def iter_child_nodes(node: Node) -> typing.Generator[Node, None, None]:
for attr, value in iter_fields(node):
if isinstance(value, Node):
yield value
elif isinstance(value, list):
for item in value:
if isinstance(item, Node):
yield item


def iter_fields(node: Node) -> typing.Generator[tuple[str, Any], None, None]:
for field_name in node._fields:
try:
value = getattr(node, field_name)
yield field_name, value
except AttributeError:
pass


class Node:
excluded_names = ['excluded_names', 'wsc_before', 'wsc_after', 'leading_wsc']
excluded_names = ['excluded_names', 'wsc_before', 'wsc_after', 'leading_wsc', 'tok', 'end_tok']

def __init__(self) -> None:
def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
# Whitespace/Comments before/after the node
self.wsc_before: list[str | Comment] = []
self.wsc_after: list[str | Comment] = []
self._tok: JSON5Token | None = tok
self._end_tok: JSON5Token | None = end_tok

@property
def col_offset(self) -> int | None:
if self._tok is None:
return None
return self._tok.index

@property
def end_col_offset(self) -> int | None:
if self._end_tok is None:
return None

# TODO fix these cases in the tokenizer
if isinstance(self, (DoubleQuotedString, SingleQuotedString)):
if '\n' in self.raw_value:
return len(self.raw_value.rsplit('\n', 1)[-1])
else:
return self._end_tok.end
elif isinstance(self, BlockComment):
if '\n' in self.value:
return len(self.value.rsplit('\n', 1)[-1])
else:
return self._end_tok.end
return self._end_tok.end

@property
def lineno(self) -> int | None:
if self._tok is None:
return None
return self._tok.lineno

@property
def end_lineno(self) -> int | None:
if self._end_tok is None:
return None
r = self._end_tok.lineno
# TODO fix these cases in the tokenizer
if isinstance(self, (DoubleQuotedString, SingleQuotedString)):
return r + self.raw_value.count('\n')
elif isinstance(self, BlockComment):
return r + self.value.count('\n')
return r

def __repr__(self) -> str:
rep = (
f"{self.__class__.__name__}("
+ ", ".join(
f"{key}={repr(value)}" for key, value in self.__dict__.items() if key not in self.excluded_names
f"{key}={repr(value)}"
for key, value in self.__dict__.items()
if not key.startswith('_') and key not in self.excluded_names
)
+ ")"
)
return rep

@property
def _fields(self) -> list[str]:
fields = [item for item in list(self.__dict__) if not item.startswith('_') and item not in self.excluded_names]
fields.extend(['wsc_before', 'wsc_after'])
return fields


class JSONText(Node):
def __init__(self, value: Value):
def __init__(self, value: Value, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
assert isinstance(value, Value)
self.value: Value = value
super().__init__()
super().__init__(tok=tok, end_tok=tok)


class Value(Node):
Expand All @@ -74,6 +155,7 @@ def __init__(
trailing_comma: TrailingComma | None = None,
leading_wsc: list[str | Comment] | None = None,
tok: JSON5Token | None = None,
end_tok: JSON5Token | None = None,
):
kvps = list(key_value_pairs)
for kvp in kvps:
Expand All @@ -82,8 +164,8 @@ def __init__(
self.key_value_pairs: list[KeyValuePair] = kvps
self.trailing_comma: TrailingComma | None = trailing_comma
self.leading_wsc: list[str | Comment] = leading_wsc or []
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=end_tok)


class JSONArray(Value):
Expand All @@ -93,6 +175,7 @@ def __init__(
trailing_comma: TrailingComma | None = None,
leading_wsc: list[str | Comment] | None = None,
tok: JSON5Token | None = None,
end_tok: JSON5Token | None = None,
):
vals = list(values)
for value in vals:
Expand All @@ -101,31 +184,33 @@ def __init__(
self.values: list[Value] = vals
self.trailing_comma: TrailingComma | None = trailing_comma
self.leading_wsc: list[str | Comment] = leading_wsc or []
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=end_tok)


class KeyValuePair(Node):
def __init__(self, key: Key, value: Value, tok: JSON5Token | None = None):
def __init__(self, key: Key, value: Value, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
assert isinstance(key, Key)
assert isinstance(value, Value)
self.key: Key = key
self.value: Value = value
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=end_tok)


class Identifier(Key):
def __init__(self, name: str, raw_value: str | None = None, tok: JSON5Token | None = None):
def __init__(
self, name: str, raw_value: str | None = None, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
):
assert isinstance(name, str)
if raw_value is None:
raw_value = name
assert isinstance(raw_value, str)
assert len(name) > 0
self.name: str = name
self.raw_value: str = raw_value
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=tok)

def __hash__(self) -> int:
return hash(self.name)
Expand All @@ -139,7 +224,14 @@ class Number(Value):


class Integer(Number):
def __init__(self, raw_value: str, is_hex: bool = False, is_octal: bool = False, tok: JSON5Token | None = None):
def __init__(
self,
raw_value: str,
is_hex: bool = False,
is_octal: bool = False,
tok: JSON5Token | None = None,
end_tok: JSON5Token | None = None,
):
assert isinstance(raw_value, str)
if is_hex and is_octal:
raise ValueError("is_hex and is_octal are mutually exclusive")
Expand All @@ -156,26 +248,32 @@ def __init__(self, raw_value: str, is_hex: bool = False, is_octal: bool = False,
self.raw_value: str = raw_value
self.is_hex: bool = is_hex
self.is_octal: bool = is_octal
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=end_tok or tok)


class Float(Number):
def __init__(self, raw_value: str, exp_notation: str | None = None, tok: JSON5Token | None = None):
def __init__(
self,
raw_value: str,
exp_notation: str | None = None,
tok: JSON5Token | None = None,
end_tok: JSON5Token | None = None,
):
value = float(raw_value)
assert exp_notation is None or exp_notation in ('e', 'E')
self.raw_value: str = raw_value
self.exp_notation: str | None = exp_notation
self.tok: JSON5Token | None = tok

self.value: float = value
super().__init__()
super().__init__(tok=tok, end_tok=end_tok or tok)


class Infinity(Number):
def __init__(self, negative: bool = False, tok: JSON5Token | None = None):
def __init__(self, negative: bool = False, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
self.negative: bool = negative
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=tok)

@property
def value(self) -> float:
Expand All @@ -190,9 +288,8 @@ def const(self) -> Literal['Infinity', '-Infinity']:


class NaN(Number):
def __init__(self, tok: JSON5Token | None = None):
self.tok: JSON5Token | None = tok
super().__init__()
def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
super().__init__(tok=tok, end_tok=tok)

@property
def value(self) -> float:
Expand All @@ -208,63 +305,66 @@ class String(Value, Key):


class DoubleQuotedString(String):
def __init__(self, characters: str, raw_value: str, tok: JSON5Token | None = None):
def __init__(
self, characters: str, raw_value: str, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
):
assert isinstance(raw_value, str)
assert isinstance(characters, str)
self.characters: str = characters
self.raw_value: str = raw_value
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=tok)


class SingleQuotedString(String):
def __init__(self, characters: str, raw_value: str, tok: JSON5Token | None = None):
def __init__(
self, characters: str, raw_value: str, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
):
assert isinstance(raw_value, str)
assert isinstance(characters, str)
self.characters: str = characters
self.raw_value: str = raw_value
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=tok)


class BooleanLiteral(Value):
def __init__(self, value: bool, tok: JSON5Token | None = None):
def __init__(self, value: bool, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
assert value in (True, False)
self.value: bool = value
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=tok)


class NullLiteral(Value):
value = None

def __init__(self, tok: JSON5Token | None = None):
self.tok: JSON5Token | None = None
super().__init__()
def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
super().__init__(tok=tok, end_tok=tok)


class UnaryOp(Value):
def __init__(self, op: Literal['-', '+'], value: Number, tok: JSON5Token | None = None):
def __init__(
self, op: Literal['-', '+'], value: Number, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
):
assert op in ('-', '+')
assert isinstance(value, Number)
self.op: Literal['-', '+'] = op
self.value: Number = value
self.tok: JSON5Token | None = tok
super().__init__()

super().__init__(tok=tok, end_tok=end_tok)


class TrailingComma(Node):
def __init__(self, tok: JSON5Token | None = None):
self.tok = tok
super().__init__()
def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
super().__init__(tok=tok, end_tok=tok) # Trailing comma is always a single COMMA token


class Comment(Node):
def __init__(self, value: str, tok: JSON5Token | None = None):
def __init__(self, value: str, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
assert isinstance(value, str), f"Expected str got {type(value)}"
self.value: str = value
self.tok: JSON5Token | None = tok
super().__init__()
super().__init__(tok=tok, end_tok=tok) # Comments are always a single token


class LineComment(Comment):
Expand Down
Loading

0 comments on commit d11ae38

Please sign in to comment.