Skip to content

Commit

Permalink
add colno/end_colno and end_linno properties to JSON5Token. restructu…
Browse files Browse the repository at this point in the history
…re JSONObject to store keys and values separately

This change removes `KeyValuePair` as a Node object.
Instead, the underlying data model will store a list of keys and list of values in separate attributes.
A new attribute for `JSONOBject` `key_value_pairs` which will be a named tuple (typing.NamedTuple) that
provides the key and value attributes.

This change brings more congruity with how the stdlib `ast` module handles Python dictionaries, to which
Python users may be more accustomed. It also maintains, by interface, the same structure for JSON5
members (https://spec.json5.org/#prod-JSON5MemberList) with the new key_value_pairs attribute (although it itself is not a Node).
  • Loading branch information
spyoungtech committed Aug 3, 2023
1 parent d11ae38 commit 52164f2
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 54 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ repos:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
exclude: ^(tests/.*)
- repo: https://github.com/asottile/reorder-python-imports
rev: v3.10.0
hooks:
Expand Down Expand Up @@ -44,3 +45,4 @@ repos:
args:
- "--ignore"
- "E501,E704,E301,W503,F405,F811,F821,F403,"
exclude: ^(tests/.*)
5 changes: 3 additions & 2 deletions json5/dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,9 @@ def json_object_to_json(self, node: JSONObject) -> Any:
self.env.write('{')
if node.leading_wsc:
self.process_leading_wsc(node)
num_pairs = len(node.key_value_pairs)
for index, kvp in enumerate(node.key_value_pairs, start=1):
key_value_pairs = node.key_value_pairs
num_pairs = len(key_value_pairs)
for index, kvp in enumerate(key_value_pairs, start=1):
self.dump(kvp.key)
self.env.write(':')
self.dump(kvp.value)
Expand Down
80 changes: 33 additions & 47 deletions json5/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections import deque
from typing import Any
from typing import Literal
from typing import NamedTuple

from .tokenizer import JSON5Token

Expand Down Expand Up @@ -35,6 +36,11 @@
]


class KeyValuePair(NamedTuple):
key: Key
value: Value


def walk(root: Node) -> typing.Generator[Node, None, None]:
todo = deque([root])
while todo:
Expand Down Expand Up @@ -76,25 +82,13 @@ def __init__(self, tok: JSON5Token | None = None, end_tok: JSON5Token | None = N
def col_offset(self) -> int | None:
if self._tok is None:
return None
return self._tok.index
return self._tok.colno

@property
def end_col_offset(self) -> int | None:
if self._end_tok is None:
return None

# TODO fix these cases in the tokenizer
if isinstance(self, (DoubleQuotedString, SingleQuotedString)):
if '\n' in self.raw_value:
return len(self.raw_value.rsplit('\n', 1)[-1])
else:
return self._end_tok.end
elif isinstance(self, BlockComment):
if '\n' in self.value:
return len(self.value.rsplit('\n', 1)[-1])
else:
return self._end_tok.end
return self._end_tok.end
return self._end_tok.end_colno

@property
def lineno(self) -> int | None:
Expand All @@ -107,14 +101,14 @@ def end_lineno(self) -> int | None:
if self._end_tok is None:
return None
r = self._end_tok.lineno
# TODO fix these cases in the tokenizer
if isinstance(self, (DoubleQuotedString, SingleQuotedString)):
return r + self.raw_value.count('\n')
elif isinstance(self, BlockComment):
return r + self.value.count('\n')
# # TODO fix these cases in the tokenizer
# if isinstance(self, (DoubleQuotedString, SingleQuotedString)):
# return r + self.raw_value.count('\n')
# elif isinstance(self, BlockComment):
# return r + self.value.count('\n')
return r

def __repr__(self) -> str:
def __str__(self) -> str:
rep = (
f"{self.__class__.__name__}("
+ ", ".join(
Expand Down Expand Up @@ -157,16 +151,26 @@ def __init__(
tok: JSON5Token | None = None,
end_tok: JSON5Token | None = None,
):
kvps = list(key_value_pairs)
for kvp in kvps:
assert isinstance(kvp, KeyValuePair), f"Expected key value pair, got {type(kvp)}"
keys: list[Key] = []
values: list[Value] = []
for key, value in key_value_pairs:
assert isinstance(key, Key)
assert isinstance(value, Value)
keys.append(key)
values.append(value)
assert len(keys) == len(values)
self.keys: list[Key] = keys
self.values: list[Value] = values
assert leading_wsc is None or all(isinstance(item, str) or isinstance(item, Comment) for item in leading_wsc)
self.key_value_pairs: list[KeyValuePair] = kvps
self.trailing_comma: TrailingComma | None = trailing_comma
self.leading_wsc: list[str | Comment] = leading_wsc or []

super().__init__(tok=tok, end_tok=end_tok)

@property
def key_value_pairs(self) -> list[KeyValuePair]:
return list(KeyValuePair(key, value) for key, value in zip(self.keys, self.values))


class JSONArray(Value):
def __init__(
Expand All @@ -188,16 +192,6 @@ def __init__(
super().__init__(tok=tok, end_tok=end_tok)


class KeyValuePair(Node):
def __init__(self, key: Key, value: Value, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None):
assert isinstance(key, Key)
assert isinstance(value, Value)
self.key: Key = key
self.value: Value = value

super().__init__(tok=tok, end_tok=end_tok)


class Identifier(Key):
def __init__(
self, name: str, raw_value: str | None = None, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
Expand Down Expand Up @@ -301,10 +295,6 @@ def const(self) -> Literal['NaN']:


class String(Value, Key):
...


class DoubleQuotedString(String):
def __init__(
self, characters: str, raw_value: str, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
):
Expand All @@ -316,16 +306,12 @@ def __init__(
super().__init__(tok=tok, end_tok=tok)


class SingleQuotedString(String):
def __init__(
self, characters: str, raw_value: str, tok: JSON5Token | None = None, end_tok: JSON5Token | None = None
):
assert isinstance(raw_value, str)
assert isinstance(characters, str)
self.characters: str = characters
self.raw_value: str = raw_value
class DoubleQuotedString(String):
...

super().__init__(tok=tok, end_tok=tok)

class SingleQuotedString(String):
...


class BooleanLiteral(Value):
Expand Down
6 changes: 3 additions & 3 deletions json5/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def first_key_value_pair(self, p: T_FirstKeyValuePairProduction) -> KeyValuePair
value.wsc_before.append(wsc)
for wsc in p.wsc2:
value.wsc_after.append(wsc)
return KeyValuePair(key=p.key, value=p.value, tok=key._tok, end_tok=value._end_tok)
return KeyValuePair(key=p.key, value=p.value)

@_('object_delimiter_seen COMMA { wsc } [ first_key_value_pair ]')
def subsequent_key_value_pair(self, p: SubsequentKeyValuePairProduction) -> KeyValuePair | TrailingComma:
Expand Down Expand Up @@ -340,14 +340,14 @@ def seen_RBRACE(self, p: Any) -> None:
@_('seen_LBRACE LBRACE { wsc } [ key_value_pairs ] seen_RBRACE RBRACE')
def json_object(self, p: T_JsonObjectProduction) -> JSONObject:
if not p.key_value_pairs:
node = JSONObject(leading_wsc=list(p.wsc or []), tok=p._slice[0], end_tok=p._slice[5])
node = JSONObject(leading_wsc=list(p.wsc or []), tok=p._slice[1], end_tok=p._slice[5])
else:
kvps, trailing_comma = p.key_value_pairs
node = JSONObject(
*kvps,
trailing_comma=trailing_comma,
leading_wsc=list(p.wsc or []),
tok=p._slice[0],
tok=p._slice[1],
end_tok=p._slice[5],
)

Expand Down
24 changes: 22 additions & 2 deletions json5/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@ def __init__(self, tok: Token, doc: str):
self.doc: str = doc
self.end: int = tok.end

@property
def colno(self) -> int:
line_start_index = self.doc.rfind('\n', 0, self.index) + 1
return self.index - line_start_index

@property
def end_colno(self) -> int:
return self.colno + self.end - self.index

@property
def end_lineno(self) -> int:
return self.lineno + self.value.count('\n')

__slots__ = ('type', 'value', 'lineno', 'index', 'doc', 'end')

def __str__(self) -> str:
Expand Down Expand Up @@ -90,8 +103,15 @@ def tokenize(self, text: str, lineno: int = 1, index: int = 0) -> Generator[JSON
COLON = r"\:"
COMMA = r"\,"

DOUBLE_QUOTE_STRING = r'"(?:[^"\\]|\\.)*"'
SINGLE_QUOTE_STRING = r"'(?:[^'\\]|\\.)*'"
@_(r'"(?:[^"\\]|\\.)*"')
def DOUBLE_QUOTE_STRING(self, tok: JSON5Token) -> JSON5Token:
self.lineno += tok.value.count('\n')
return tok

@_(r"'(?:[^'\\]|\\.)*'")
def SINGLE_QUOTE_STRING(self, tok: JSON5Token) -> JSON5Token:
self.lineno += tok.value.count('\n')
return tok

LINE_COMMENT = r"//[^\n]*"

Expand Down
47 changes: 47 additions & 0 deletions tests/test_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import ast

import pytest

import json5.loader
import json5.model

TEST_TEXT = '''\
{
"string_on_same_line": "string on same line",
"multiline_dq_string": "this line has a \
continuation",
"leadingDecimalPoint": .8675309 ,
"andTrailing": 8675309.,
"trailingComma": 'in objects',
"backwardsCompatible": "with JSON",
}
'''

model = json5.loads(TEST_TEXT, loader=json5.loader.ModelLoader())
tree = ast.parse(TEST_TEXT)
ast_nodes = [
node for node in list(ast.walk(tree)) if not isinstance(node, (ast.Expr, ast.Load, ast.Module, ast.UnaryOp))
]
json5_nodes = [
node
for node in list(json5.model.walk(model))
if not isinstance(node, (json5.model.TrailingComma, json5.model.JSONText))
]

assert len(ast_nodes) == len(json5_nodes)


@pytest.mark.parametrize('ast_node, json5_node', list(zip(ast_nodes, json5_nodes)))
@pytest.mark.parametrize(
'attr_name',
[
'col_offset',
'end_col_offset',
'lineno',
'end_lineno',
],
)
def test_node_attribute_accuracy(attr_name: str, ast_node, json5_node):
assert getattr(json5_node, attr_name) == getattr(
ast_node, attr_name
), f'{attr_name} did not match {ast_node!r}, {json5_node!r}'

0 comments on commit 52164f2

Please sign in to comment.