Skip to content

Commit 7551c7e

Browse files
committedFeb 26, 2025
sql: add jsonpath parser
This is a prototype for a minimal jsonpath parser. It currently supports a small set of features: setting jsonpath mode (strict/lax), root ($), key accessors (.key_name), and array wildcards ([*]). This is standalone and doesn't integrate with the database. Part of: cockroachdb#22513 Release note: None

25 files changed

+938
-13
lines changed
 

‎BUILD.bazel

+2
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ exports_files([
124124
# gazelle:exclude pkg/sql/plpgsql/parser/lexbase/keywords.go
125125
# gazelle:exclude pkg/sql/plpgsql/parser/lexbase/tokens.go
126126
# gazelle:exclude pkg/sql/plpgsql/parser/lexbase/reserved_keywords.go
127+
# gazelle:exclude pkg/util/jsonpath/parser/lexbase/keywords.go
128+
# gazelle:exclude pkg/util/jsonpath/parser/lexbase/tokens.go
127129
# gazelle:exclude pkg/sql/scanner/token_names_test.go
128130
# gazelle:exclude pkg/sql/schemachanger/scexec/mocks_generated_test.go
129131
# gazelle:exclude pkg/cmd/prereqs/testdata

‎pkg/BUILD.bazel

+5
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,7 @@ ALL_TESTS = [
741741
"//pkg/util/json/tokenizer:tokenizer_test",
742742
"//pkg/util/json:json_disallowed_imports_test",
743743
"//pkg/util/json:json_test",
744+
"//pkg/util/jsonpath/parser:parser_test",
744745
"//pkg/util/limit:limit_test",
745746
"//pkg/util/log/eventpb:eventpb_test",
746747
"//pkg/util/log/logconfig:logconfig_test",
@@ -2581,6 +2582,10 @@ GO_TARGETS = [
25812582
"//pkg/util/json:json",
25822583
"//pkg/util/json:json_test",
25832584
"//pkg/util/jsonbytes:jsonbytes",
2585+
"//pkg/util/jsonpath/parser/lexbase:lexbase",
2586+
"//pkg/util/jsonpath/parser:parser",
2587+
"//pkg/util/jsonpath/parser:parser_test",
2588+
"//pkg/util/jsonpath:jsonpath",
25842589
"//pkg/util/keysutil:keysutil",
25852590
"//pkg/util/leaktest:leaktest",
25862591
"//pkg/util/limit:limit",

‎pkg/gen/misc.bzl

+3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ MISC_SRCS = [
2727
"//pkg/testutils/serverutils:ts_control_forwarder_generated.go",
2828
"//pkg/util/interval/generic:example_interval_btree.go",
2929
"//pkg/util/interval/generic:example_interval_btree_test.go",
30+
"//pkg/util/jsonpath/parser/lexbase:keywords.go",
31+
"//pkg/util/jsonpath/parser/lexbase:tokens.go",
32+
"//pkg/util/jsonpath/parser:jsonpath.go",
3033
"//pkg/util/log/channel:channel_generated.go",
3134
"//pkg/util/log/eventpb/eventpbgen:log_channels_generated.go",
3235
"//pkg/util/log/eventpb:eventlog_channels_generated.go",

‎pkg/sql/lexbase/sql-gen.sh

+4-4
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
# included in the /LICENSE file.
77

88

9-
# This is used through bazel when generating sql.go and plpgsql.go.
10-
# Look at BUILD.bazel in pkg/sql/parser or pkg/plpgsql/parser for
11-
# usage.
9+
# This is used through bazel when generating sql.go, plpgsql.go, and jsonpath.go.
10+
# Look at BUILD.bazel in pkg/sql/parser, pkg/sql/plpgsql/parser, or
11+
# pkg/util/jsonpath/parser for usage.
1212

1313
set -euo pipefail
1414

@@ -23,7 +23,7 @@ GENYACC=$LANG-gen.y
2323
awk '{print $0")>_\\1 <union> /* <\\2> */_"}' > types_regex.tmp
2424

2525
sed -E -f types_regex.tmp < $1 | \
26-
if [ $LANG != plpgsql ] && [ $LANG != pgrepl ]; then \
26+
if [ $LANG != plpgsql ] && [ $LANG != pgrepl ] && [ $LANG != jsonpath ]; then \
2727
awk -f $3 | \
2828
sed -Ee 's,//.*$$,,g;s,/[*]([^*]|[*][^/])*[*]/, ,g;s/ +$$//g' > $GENYACC
2929
else

‎pkg/sql/parser/statements/BUILD.bazel

+1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@ go_library(
88
deps = [
99
"//pkg/sql/sem/plpgsqltree",
1010
"//pkg/sql/sem/tree",
11+
"//pkg/util/jsonpath",
1112
],
1213
)

‎pkg/sql/parser/statements/statement.go

+15
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package statements
88
import (
99
"github.com/cockroachdb/cockroach/pkg/sql/sem/plpgsqltree"
1010
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
11+
"github.com/cockroachdb/cockroach/pkg/util/jsonpath"
1112
)
1213

1314
// Statement is the result of parsing a single statement. It contains the AST
@@ -60,6 +61,8 @@ type Statements []Statement[tree.Statement]
6061

6162
type PLpgStatement Statement[*plpgsqltree.Block]
6263

64+
type JsonpathStatement Statement[*jsonpath.Jsonpath]
65+
6366
// String returns the AST formatted as a string.
6467
func (stmts Statements) String() string {
6568
return stmts.StringWithFlags(tree.FmtSimple)
@@ -88,10 +91,22 @@ func (stmt PLpgStatement) StringWithFlags(flags tree.FmtFlags) string {
8891
return ctx.CloseAndGetString()
8992
}
9093

94+
func (stmt JsonpathStatement) String() string {
95+
return stmt.StringWithFlags(tree.FmtSimple)
96+
}
97+
98+
// StringWithFlags returns the AST formatted as a string (with the given flags).
99+
func (stmt JsonpathStatement) StringWithFlags(flags tree.FmtFlags) string {
100+
ctx := tree.NewFmtCtx(flags)
101+
stmt.AST.Format(ctx)
102+
return ctx.CloseAndGetString()
103+
}
104+
91105
type ParsedStmts interface {
92106
String() string
93107
StringWithFlags(flags tree.FmtFlags) string
94108
}
95109

96110
var _ ParsedStmts = Statements{}
97111
var _ ParsedStmts = PLpgStatement{}
112+
var _ ParsedStmts = JsonpathStatement{}

‎pkg/sql/plpgsql/parser/BUILD.bazel

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ sh_binary(
55
srcs = ["//pkg/sql/lexbase:sql-gen.sh"],
66
)
77

8-
# Define the target to auto-generate sql.go from the grammar file.
8+
# Define the target to auto-generate plpgsql.go from the grammar file.
99
genrule(
1010
name = "plpgsql-goyacc",
1111
srcs = [
@@ -16,7 +16,6 @@ genrule(
1616
export GOPATH=/nonexist-gopath
1717
$(location :plpgsql-gen) $(location plpgsql.y) plpgsql ""\
1818
$(location plpgsql.go) $(location @org_golang_x_tools//cmd/goyacc)
19-
2019
""",
2120
tools = [
2221
":plpgsql-gen",

‎pkg/sql/scanner/BUILD.bazel

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
33
go_library(
44
name = "scanner",
55
srcs = [
6+
"jsonpath_scan.go",
67
"plpgsql_scan.go",
78
"scan.go",
89
],
@@ -11,6 +12,7 @@ go_library(
1112
deps = [
1213
"//pkg/sql/lexbase",
1314
"//pkg/sql/plpgsql/parser/lexbase",
15+
"//pkg/util/jsonpath/parser/lexbase",
1416
],
1517
)
1618

‎pkg/sql/scanner/jsonpath_scan.go

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package scanner
7+
8+
import (
9+
sqllexbase "github.com/cockroachdb/cockroach/pkg/sql/lexbase"
10+
"github.com/cockroachdb/cockroach/pkg/util/jsonpath/parser/lexbase"
11+
)
12+
13+
// JSONPathScanner is a scanner with a jsonpath-specific scan function.
14+
type JSONPathScanner struct {
15+
Scanner
16+
}
17+
18+
// Scan scans the next token and populates its information into lval.
19+
// This scan function contains rules for jsonpath.
20+
func (s *JSONPathScanner) Scan(lval ScanSymType) {
21+
ch, skipWhiteSpace := s.scanSetup(lval)
22+
if skipWhiteSpace {
23+
return
24+
}
25+
26+
// TODO(normanchenn): This check will not work for valid JSONPath expressions
27+
// like '$.1key'. We don't support this case yet since expressions like
28+
// '$.1e' should fail due to being interpreted as a numeric literal.
29+
if sqllexbase.IsIdentStart(ch) {
30+
s.scanIdent(lval)
31+
return
32+
}
33+
// Everything else is a single character token which we already initialized
34+
// lval for above.
35+
}
36+
37+
// isIdentMiddle returns true if the character is valid inside an identifier.
38+
func isIdentMiddle(ch int) bool {
39+
return sqllexbase.IsIdentStart(ch) || sqllexbase.IsDigit(ch)
40+
}
41+
42+
// scanIdent is similar to Scanner.scanIdent, but uses Jsonpath tokens.
43+
func (s *JSONPathScanner) scanIdent(lval ScanSymType) {
44+
s.lowerCaseAndNormalizeIdent(lval, isIdentMiddle)
45+
lval.SetID(lexbase.GetKeywordID(lval.Str()))
46+
}

‎pkg/sql/scanner/plpgsql_scan.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,6 @@ func (s *PLpgSQLScanner) scanNumber(lval ScanSymType, ch int) {
433433

434434
// scanIdent is similar to Scanner.scanIdent, but uses PL/pgSQL tokens.
435435
func (s *PLpgSQLScanner) scanIdent(lval ScanSymType) {
436-
s.lowerCaseAndNormalizeIdent(lval)
436+
s.lowerCaseAndNormalizeIdent(lval, sqllex.IsIdentMiddle)
437437
lval.SetID(lexbase.GetKeywordID(lval.Str()))
438438
}

‎pkg/sql/scanner/scan.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,13 @@ type ScanSymType interface {
4747
SetUnionVal(interface{})
4848
}
4949

50-
// Scanner lexes SQL statements.
50+
// Scanner lexes statements.
5151
type Scanner struct {
5252
in string
5353
pos int
5454
bytesPrealloc []byte
5555

56-
// Comments is the list of parsed comments from the SQL statement.
56+
// Comments is the list of parsed comments from the statement.
5757
Comments []string
5858

5959
// lastAttemptedID indicates the ID of the last attempted
@@ -606,7 +606,7 @@ func (s *Scanner) ScanComment(lval ScanSymType) (present, ok bool) {
606606
return false, true
607607
}
608608

609-
func (s *Scanner) lowerCaseAndNormalizeIdent(lval ScanSymType) {
609+
func (s *Scanner) lowerCaseAndNormalizeIdent(lval ScanSymType, isIdentMiddle func(int) bool) {
610610
s.lastAttemptedID = int32(lexbase.IDENT)
611611
s.pos--
612612
start := s.pos
@@ -627,7 +627,7 @@ func (s *Scanner) lowerCaseAndNormalizeIdent(lval ScanSymType) {
627627
isLower = false
628628
}
629629

630-
if !lexbase.IsIdentMiddle(ch) {
630+
if !isIdentMiddle(ch) {
631631
break
632632
}
633633

@@ -656,7 +656,7 @@ func (s *Scanner) lowerCaseAndNormalizeIdent(lval ScanSymType) {
656656
}
657657

658658
func (s *Scanner) scanIdent(lval ScanSymType) {
659-
s.lowerCaseAndNormalizeIdent(lval)
659+
s.lowerCaseAndNormalizeIdent(lval, lexbase.IsIdentMiddle)
660660

661661
isExperimental := false
662662
kw := lval.Str()

‎pkg/sql/sem/tree/datum.go

+3
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ var (
9393
// deriving the arguments to construct a specific time.Time.
9494
MinSupportedTime = timeutil.Unix(-210866803200, 0) // 4714-11-24 00:00:00+00 BC
9595
MinSupportedTimeSec = float64(MinSupportedTime.Unix())
96+
97+
// ValidateJSONPath is injected from pkg/util/jsonpath/parser/parse.go.
98+
ValidateJSONPath func(string) (string, error)
9699
)
97100

98101
// CompareContext represents the dependencies used to evaluate comparisons

‎pkg/testutils/lint/lint_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1767,7 +1767,7 @@ func TestLint(t *testing.T) {
17671767
}
17681768
}
17691769

1770-
ignore := `zcgo*|\.(pb(\.gw)?)|(\.[eo]g)\.go|/testdata/|^sql/parser/sql\.go$|(_)?generated(_test)?\.go$|^sql/pgrepl/pgreplparser/pgrepl\.go$|^sql/plpgsql/parser/plpgsql\.go$`
1770+
ignore := `zcgo*|\.(pb(\.gw)?)|(\.[eo]g)\.go|/testdata/|^sql/parser/sql\.go$|(_)?generated(_test)?\.go$|^sql/pgrepl/pgreplparser/pgrepl\.go$|^sql/plpgsql/parser/plpgsql\.go$|^util/jsonpath/parser/jsonpath\.go$`
17711771
cmd, stderr, filter, err := dirCmd(pkgDir, crlfmt, "-fast", "-ignore", ignore, "-tab", "2", ".")
17721772
if err != nil {
17731773
t.Fatal(err)

‎pkg/util/jsonpath/BUILD.bazel

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library")
2+
3+
go_library(
4+
name = "jsonpath",
5+
srcs = ["expr.go"],
6+
importpath = "github.com/cockroachdb/cockroach/pkg/util/jsonpath",
7+
visibility = ["//visibility:public"],
8+
deps = ["//pkg/sql/sem/tree"],
9+
)

‎pkg/util/jsonpath/expr.go

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package jsonpath
7+
8+
import (
9+
"fmt"
10+
"strings"
11+
12+
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
13+
)
14+
15+
type Expr interface {
16+
fmt.Stringer
17+
tree.NodeFormatter
18+
}
19+
20+
// Identical to Expr for now.
21+
type Accessor interface {
22+
Expr
23+
}
24+
25+
type Jsonpath struct {
26+
Query Query
27+
Strict bool
28+
}
29+
30+
var _ Expr = Jsonpath{}
31+
32+
func (j Jsonpath) String() string {
33+
var mode string
34+
if j.Strict {
35+
mode = "strict "
36+
}
37+
return mode + j.Query.String()
38+
}
39+
40+
func (j Jsonpath) Format(ctx *tree.FmtCtx) {
41+
ctx.WriteString(j.String())
42+
}
43+
44+
type Query struct {
45+
Accessors []Accessor
46+
}
47+
48+
var _ Expr = Query{}
49+
50+
func (q Query) String() string {
51+
var sb strings.Builder
52+
for _, accessor := range q.Accessors {
53+
sb.WriteString(accessor.String())
54+
}
55+
return sb.String()
56+
}
57+
58+
func (q Query) Format(ctx *tree.FmtCtx) {
59+
ctx.WriteString(q.String())
60+
}
61+
62+
type Root struct{}
63+
64+
var _ Accessor = Root{}
65+
66+
func (r Root) String() string { return "$" }
67+
68+
func (r Root) Format(ctx *tree.FmtCtx) {
69+
ctx.WriteString(r.String())
70+
}
71+
72+
type Key struct {
73+
Key string
74+
}
75+
76+
var _ Accessor = Key{}
77+
78+
func (k Key) String() string { return "." + k.Key }
79+
80+
func (k Key) Format(ctx *tree.FmtCtx) {
81+
ctx.WriteString(k.String())
82+
}
83+
84+
type Wildcard struct{}
85+
86+
var _ Accessor = Wildcard{}
87+
88+
func (w Wildcard) String() string { return "[*]" }
89+
90+
func (w Wildcard) Format(ctx *tree.FmtCtx) {
91+
ctx.WriteString(w.String())
92+
}

‎pkg/util/jsonpath/parser/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
jsonpath.go

‎pkg/util/jsonpath/parser/BUILD.bazel

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
2+
3+
sh_binary(
4+
name = "jsonpath-gen",
5+
srcs = ["//pkg/sql/lexbase:sql-gen.sh"],
6+
)
7+
8+
# Define the target to auto-generate jsonpath.go from the grammar file.
9+
genrule(
10+
name = "jsonpath-goyacc",
11+
srcs = [
12+
"jsonpath.y",
13+
],
14+
outs = ["jsonpath.go"],
15+
cmd = """
16+
export GOPATH=/nonexist-gopath
17+
$(location :jsonpath-gen) $(location jsonpath.y) jsonpath ""\
18+
$(location jsonpath.go) $(location @org_golang_x_tools//cmd/goyacc)
19+
""",
20+
tools = [
21+
":jsonpath-gen",
22+
"@org_golang_x_tools//cmd/goyacc",
23+
],
24+
visibility = ["//visibility:public"],
25+
)
26+
27+
go_library(
28+
name = "parser",
29+
srcs = [
30+
"jsonpath.go",
31+
"lexer.go",
32+
"parse.go",
33+
],
34+
importpath = "github.com/cockroachdb/cockroach/pkg/util/jsonpath/parser",
35+
visibility = ["//visibility:public"],
36+
deps = [
37+
"//pkg/sql/parser",
38+
"//pkg/sql/parser/statements",
39+
"//pkg/sql/pgwire/pgcode",
40+
"//pkg/sql/pgwire/pgerror",
41+
"//pkg/sql/scanner",
42+
"//pkg/sql/sem/tree",
43+
"//pkg/util/jsonpath",
44+
"@com_github_cockroachdb_errors//:errors",
45+
],
46+
)
47+
48+
exports_files(
49+
[
50+
"jsonpath.y",
51+
],
52+
visibility = ["//visibility:public"],
53+
)
54+
55+
go_test(
56+
name = "parser_test",
57+
srcs = ["parser_test.go"],
58+
data = glob(["testdata/**"]),
59+
deps = [
60+
":parser",
61+
"//pkg/testutils/datapathutils",
62+
"//pkg/testutils/sqlutils",
63+
"@com_github_cockroachdb_datadriven//:datadriven",
64+
],
65+
)

‎pkg/util/jsonpath/parser/jsonpath.y

+230
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
%{
2+
package parser
3+
4+
import (
5+
"github.com/cockroachdb/cockroach/pkg/sql/scanner"
6+
"github.com/cockroachdb/cockroach/pkg/util/jsonpath"
7+
)
8+
9+
%}
10+
11+
%{
12+
13+
var _ scanner.ScanSymType = &jsonpathSymType{}
14+
15+
func (s *jsonpathSymType) jsonpathScanSymType() {}
16+
17+
// ID implements the scanner.ScanSymType interface.
18+
func (s *jsonpathSymType) ID() int32 {
19+
return s.id
20+
}
21+
22+
// SetID implements the scanner.ScanSymType interface.
23+
func (s *jsonpathSymType) SetID(id int32) {
24+
s.id = id
25+
}
26+
27+
// Pos implements the scanner.ScanSymType interface.
28+
func (s *jsonpathSymType) Pos() int32 {
29+
return s.pos
30+
}
31+
32+
// SetPos implements the scanner.ScanSymType interface.
33+
func (s *jsonpathSymType) SetPos(pos int32) {
34+
s.pos = pos
35+
}
36+
37+
// Str implements the scanner.ScanSymType interface.
38+
func (s *jsonpathSymType) Str() string {
39+
return s.str
40+
}
41+
42+
// SetStr implements the scanner.ScanSymType interface.
43+
func (s *jsonpathSymType) SetStr(str string) {
44+
s.str = str
45+
}
46+
47+
// UnionVal implements the scanner.ScanSymType interface.
48+
func (s *jsonpathSymType) UnionVal() interface{} {
49+
return s.union.val
50+
}
51+
52+
// SetUnionVal implements the scanner.ScanSymType interface.
53+
func (s *jsonpathSymType) SetUnionVal(val interface{}) {
54+
s.union.val = val
55+
}
56+
57+
type jsonpathSymUnion struct {
58+
val interface{}
59+
}
60+
61+
func (u *jsonpathSymUnion) expr() jsonpath.Expr {
62+
return u.val.(jsonpath.Expr)
63+
}
64+
65+
func (u *jsonpathSymUnion) accessor() jsonpath.Accessor {
66+
return u.val.(jsonpath.Accessor)
67+
}
68+
69+
func (u *jsonpathSymUnion) query() jsonpath.Query {
70+
return u.val.(jsonpath.Query)
71+
}
72+
73+
func (u *jsonpathSymUnion) root() jsonpath.Root {
74+
return u.val.(jsonpath.Root)
75+
}
76+
77+
func (u *jsonpathSymUnion) key() jsonpath.Key {
78+
return u.val.(jsonpath.Key)
79+
}
80+
81+
func (u *jsonpathSymUnion) wildcard() jsonpath.Wildcard {
82+
return u.val.(jsonpath.Wildcard)
83+
}
84+
85+
func (u *jsonpathSymUnion) bool() bool {
86+
return u.val.(bool)
87+
}
88+
89+
%}
90+
91+
%union{
92+
id int32
93+
pos int32
94+
str string
95+
union jsonpathSymUnion
96+
}
97+
98+
/*
99+
* Basic non-keyword token types. These are hard-wired into the core lexer.
100+
* They must be listed first so that their numeric codes do not depend on
101+
* the set of keywords. Keep this list in sync with backend/parser/gram.y!
102+
*
103+
* Some of these are not directly referenced in this file, but they must be
104+
* here anyway.
105+
*/
106+
%token <str> IDENT UIDENT FCONST SCONST USCONST BCONST XCONST Op
107+
%token <*tree.NumVal> ICONST PARAM
108+
%token <str> TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
109+
%token <str> LESS_EQUALS GREATER_EQUALS NOT_EQUALS
110+
111+
%token <str> ERROR
112+
113+
%token <str> STRICT
114+
%token <str> LAX
115+
116+
%type <jsonpath.Expr> jsonpath
117+
%type <jsonpath.Expr> expr_or_predicate
118+
%type <jsonpath.Expr> expr
119+
%type <jsonpath.Expr> accessor_expr
120+
%type <jsonpath.Accessor> accessor_op
121+
%type <jsonpath.Accessor> path_primary
122+
%type <jsonpath.Accessor> key
123+
%type <str> key_name
124+
%type <jsonpath.Accessor> array_accessor
125+
%type <str> any_identifier
126+
%type <str> unreserved_keyword
127+
%type <bool> mode
128+
129+
%%
130+
131+
jsonpath:
132+
mode expr_or_predicate
133+
{
134+
jp := jsonpath.Jsonpath{Query: $2.query(), Strict: $1.bool()}
135+
jsonpathlex.(*lexer).SetJsonpath(jp)
136+
}
137+
;
138+
139+
mode:
140+
STRICT
141+
{
142+
$$.val = true
143+
}
144+
| LAX
145+
{
146+
$$.val = false
147+
}
148+
| /* empty */
149+
{
150+
$$.val = false
151+
}
152+
;
153+
154+
expr_or_predicate:
155+
expr
156+
{
157+
$$.val = $1.query()
158+
}
159+
;
160+
161+
expr:
162+
accessor_expr
163+
{
164+
$$.val = $1.query()
165+
}
166+
;
167+
168+
accessor_expr:
169+
path_primary
170+
{
171+
$$.val = jsonpath.Query{Accessors: []jsonpath.Accessor{$1.accessor()}}
172+
}
173+
| accessor_expr accessor_op
174+
{
175+
a := $1.query()
176+
a.Accessors = append(a.Accessors, $2.accessor())
177+
$$.val = a
178+
}
179+
;
180+
181+
path_primary:
182+
'$'
183+
{
184+
$$.val = jsonpath.Root{}
185+
}
186+
;
187+
188+
accessor_op:
189+
'.' key
190+
{
191+
$$.val = $2.key()
192+
}
193+
| array_accessor
194+
{
195+
$$.val = $1.wildcard()
196+
}
197+
;
198+
199+
key:
200+
key_name
201+
{
202+
$$.val = jsonpath.Key{Key: $1}
203+
}
204+
;
205+
206+
key_name:
207+
any_identifier
208+
{
209+
$$ = $1
210+
}
211+
;
212+
213+
array_accessor:
214+
'[' '*' ']'
215+
{
216+
$$.val = jsonpath.Wildcard{}
217+
}
218+
;
219+
220+
any_identifier:
221+
IDENT
222+
| unreserved_keyword
223+
;
224+
225+
unreserved_keyword:
226+
STRICT
227+
| LAX
228+
;
229+
230+
%%
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
keywords.go
2+
tokens.go
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library")
2+
3+
go_library(
4+
name = "lexbase",
5+
srcs = [
6+
"utils.go",
7+
":gen-keywords", # keep
8+
":gen-tokens", # keep
9+
],
10+
importpath = "github.com/cockroachdb/cockroach/pkg/util/jsonpath/parser/lexbase",
11+
visibility = ["//visibility:public"],
12+
)
13+
14+
genrule(
15+
name = "gen-tokens",
16+
outs = ["tokens.go"],
17+
cmd = """
18+
(echo "// Code generated by make. DO NOT EDIT."; \
19+
echo "// GENERATED FILE DO NOT EDIT"; \
20+
echo; \
21+
echo "package lexbase"; \
22+
echo; \
23+
grep '^const [A-Z][_A-Z0-9]* ' $(location //pkg/util/jsonpath/parser:jsonpath-goyacc)) > $@
24+
""",
25+
tools = [
26+
"//pkg/util/jsonpath/parser:jsonpath-goyacc",
27+
],
28+
visibility = [
29+
":__pkg__",
30+
"//pkg/gen:__pkg__",
31+
],
32+
)
33+
34+
# Define the target to auto-generate our list of keywords from the grammar file.
35+
genrule(
36+
name = "gen-keywords",
37+
srcs = [
38+
"//pkg/util/jsonpath/parser:jsonpath.y",
39+
],
40+
outs = ["keywords.go"],
41+
cmd = """
42+
$(location //pkg/sql/lexbase/allkeywords) < $(location //pkg/util/jsonpath/parser:jsonpath.y) > $@
43+
""",
44+
tools = [
45+
"//pkg/sql/lexbase/allkeywords",
46+
],
47+
visibility = [
48+
":__pkg__",
49+
"//pkg/gen:__pkg__",
50+
],
51+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package lexbase

‎pkg/util/jsonpath/parser/lexer.go

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package parser
7+
8+
import (
9+
"strings"
10+
11+
"github.com/cockroachdb/cockroach/pkg/sql/parser"
12+
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
13+
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
14+
"github.com/cockroachdb/cockroach/pkg/util/jsonpath"
15+
"github.com/cockroachdb/errors"
16+
)
17+
18+
type lexer struct {
19+
in string
20+
// tokens contains tokens generated by the scanner.
21+
tokens []jsonpathSymType
22+
23+
// lastPos is the position into the tokens slice of the last
24+
// token returned by Lex().
25+
lastPos int
26+
27+
expr *jsonpath.Jsonpath
28+
29+
lastError error
30+
31+
parser jsonpathParser
32+
}
33+
34+
func (l *lexer) init(sql string, tokens []jsonpathSymType, p jsonpathParser) {
35+
l.in = sql
36+
l.tokens = tokens
37+
l.lastPos = -1
38+
l.expr = nil
39+
l.lastError = nil
40+
l.parser = p
41+
}
42+
43+
// cleanup is used to avoid holding on to memory unnecessarily (for the cases
44+
// where we reuse a scanner).
45+
func (l *lexer) cleanup() {
46+
l.tokens = nil
47+
l.expr = nil
48+
l.lastError = nil
49+
}
50+
51+
func (l *lexer) lastToken() jsonpathSymType {
52+
if l.lastPos < 0 {
53+
return jsonpathSymType{}
54+
}
55+
56+
if l.lastPos >= len(l.tokens) {
57+
return jsonpathSymType{
58+
id: 0,
59+
pos: int32(len(l.in)),
60+
str: "EOF",
61+
}
62+
}
63+
return l.tokens[l.lastPos]
64+
}
65+
66+
// Lex implements the jsonpathLexer interface.
67+
func (l *lexer) Lex(lval *jsonpathSymType) int {
68+
l.lastPos++
69+
if l.lastPos >= len(l.tokens) {
70+
lval.id = 0
71+
lval.pos = int32(len(l.in))
72+
lval.str = "EOF"
73+
return 0
74+
}
75+
*lval = l.tokens[l.lastPos]
76+
return int(lval.id)
77+
}
78+
79+
// Error implements the jsonpathLexer interface.
80+
func (l *lexer) Error(s string) {
81+
s = strings.TrimPrefix(s, "syntax error: ") // we'll add it again below.
82+
err := pgerror.WithCandidateCode(errors.Newf("%s", s), pgcode.Syntax)
83+
lastTok := l.lastToken()
84+
l.lastError = parser.PopulateErrorDetails(lastTok.id, lastTok.str, lastTok.pos, err, l.in)
85+
}
86+
87+
func (l *lexer) SetJsonpath(expr jsonpath.Jsonpath) {
88+
l.expr = &expr
89+
}

‎pkg/util/jsonpath/parser/parse.go

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package parser
7+
8+
import (
9+
"github.com/cockroachdb/cockroach/pkg/sql/parser/statements"
10+
"github.com/cockroachdb/cockroach/pkg/sql/scanner"
11+
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
12+
"github.com/cockroachdb/errors"
13+
)
14+
15+
func init() {
16+
tree.ValidateJSONPath = func(jsonpath string) (string, error) {
17+
jp, err := Parse(jsonpath)
18+
if err != nil {
19+
return "", err
20+
}
21+
return jp.AST.String(), nil
22+
}
23+
}
24+
25+
type Parser struct {
26+
scanner scanner.JSONPathScanner
27+
lexer lexer
28+
parserImpl jsonpathParserImpl
29+
}
30+
31+
func (p *Parser) scan() (query string, tokens []jsonpathSymType, done bool) {
32+
var lval jsonpathSymType
33+
34+
p.scanner.Scan(&lval)
35+
if lval.id == 0 {
36+
return "", nil, true
37+
}
38+
39+
startPos := lval.pos
40+
41+
lval.pos = 0
42+
tokens = append(tokens, lval)
43+
var posBeforeScan int
44+
for {
45+
if lval.id == ERROR {
46+
return p.scanner.In()[startPos:], tokens, true
47+
}
48+
lval = jsonpathSymType{}
49+
posBeforeScan = p.scanner.Pos()
50+
p.scanner.Scan(&lval)
51+
if lval.id == 0 {
52+
return p.scanner.In()[startPos:posBeforeScan], tokens, (lval.id == 0)
53+
}
54+
lval.pos -= startPos
55+
tokens = append(tokens, lval)
56+
}
57+
}
58+
59+
// parse parses a statement from the given scanned tokens.
60+
func (p *Parser) parse(
61+
query string, tokens []jsonpathSymType,
62+
) (statements.JsonpathStatement, error) {
63+
p.lexer.init(query, tokens, &p.parserImpl)
64+
defer p.lexer.cleanup()
65+
if p.parserImpl.Parse(&p.lexer) != 0 {
66+
if p.lexer.lastError == nil {
67+
// This should never happen -- there should be an error object
68+
// every time Parse() returns nonzero. We're just playing safe
69+
// here.
70+
p.lexer.Error("syntax error")
71+
}
72+
err := p.lexer.lastError
73+
return statements.JsonpathStatement{}, err
74+
}
75+
return statements.JsonpathStatement{
76+
AST: p.lexer.expr,
77+
SQL: query,
78+
}, nil
79+
}
80+
81+
func (p *Parser) Parse(jsonpath string) (statements.JsonpathStatement, error) {
82+
p.scanner.Init(jsonpath)
83+
defer p.scanner.Cleanup()
84+
85+
query, tokens, done := p.scan()
86+
stmt, err := p.parse(query, tokens)
87+
if err != nil {
88+
return statements.JsonpathStatement{}, err
89+
}
90+
if !done {
91+
return statements.JsonpathStatement{}, errors.AssertionFailedf("invalid jsonpath query: %s", jsonpath)
92+
}
93+
return stmt, nil
94+
}
95+
96+
// Parse parses a jsonpath string and returns a jsonpath.Jsonpath object.
97+
func Parse(jsonpath string) (statements.JsonpathStatement, error) {
98+
var p Parser
99+
return p.Parse(jsonpath)
100+
}
+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package parser_test
7+
8+
import (
9+
"bytes"
10+
"fmt"
11+
"testing"
12+
13+
"github.com/cockroachdb/cockroach/pkg/testutils/datapathutils"
14+
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
15+
"github.com/cockroachdb/cockroach/pkg/util/jsonpath/parser"
16+
"github.com/cockroachdb/datadriven"
17+
)
18+
19+
// TestParseDataDriven verifies that we can parse the supplied Jsonpath.
20+
//
21+
// The following commands are allowed:
22+
//
23+
// - parse
24+
//
25+
// Parses Jsonpath and verifies that it round-trips. Various forms of the
26+
// formatted AST are printed as test output.
27+
//
28+
// - error
29+
//
30+
// Parses Jsonpath and expects an error. The error is printed as test
31+
// output.
32+
func TestParseDataDriven(t *testing.T) {
33+
datadriven.Walk(t, datapathutils.TestDataPath(t), func(t *testing.T, path string) {
34+
datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string {
35+
switch d.Cmd {
36+
case "parse":
37+
return VerifyParse(t, d.Input, d.Pos)
38+
case "error":
39+
_, err := parser.Parse(d.Input)
40+
if err == nil {
41+
d.Fatalf(t, "%s\nexpected error, found none", d.Pos)
42+
}
43+
return sqlutils.VerifyParseError(err)
44+
default:
45+
d.Fatalf(t, "%s\nunsupported command: %s", d.Pos, d.Cmd)
46+
}
47+
return ""
48+
})
49+
})
50+
}
51+
52+
func VerifyParse(t *testing.T, input, pos string) string {
53+
t.Helper()
54+
55+
jsonpath, err := parser.Parse(input)
56+
if err != nil {
57+
t.Fatalf("%s\nunexpected parse error: %v", pos, err)
58+
}
59+
60+
ref := jsonpath.String()
61+
note := ""
62+
if ref != input {
63+
note = " -- normalized!"
64+
}
65+
66+
var buf bytes.Buffer
67+
fmt.Fprintf(&buf, "%s%s\n", ref, note)
68+
return buf.String()
69+
}
+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# empty string
2+
error
3+
4+
----
5+
----
6+
at or near "EOF": syntax error
7+
DETAIL: source SQL:
8+
9+
^
10+
----
11+
----
12+
13+
parse
14+
$
15+
----
16+
$
17+
18+
parse
19+
$
20+
----
21+
$
22+
23+
parse
24+
$.abc
25+
----
26+
$.abc
27+
28+
parse
29+
$.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z
30+
----
31+
$.a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z
32+
33+
parse
34+
$ .a .b . c
35+
----
36+
$.a.b.c -- normalized!
37+
38+
parse
39+
$.a[*].b.c[*]
40+
----
41+
$.a[*].b.c[*]
42+
43+
parse
44+
$ . a [ * ] . bcd
45+
----
46+
$.a[*].bcd -- normalized!
47+
48+
error
49+
$.a[
50+
----
51+
at or near "EOF": syntax error
52+
DETAIL: source SQL:
53+
$.a[
54+
^
55+
56+
error
57+
$.a[]
58+
----
59+
at or near "]": syntax error
60+
DETAIL: source SQL:
61+
$.a[]
62+
^
63+
64+
parse
65+
strict $
66+
----
67+
strict $
68+
69+
parse
70+
lax $
71+
----
72+
$ -- normalized!
73+
74+
parse
75+
strict $.strict.lax
76+
----
77+
strict $.strict.lax
78+
79+
error
80+
strict lax $.strict.lax
81+
----
82+
at or near "lax": syntax error
83+
DETAIL: source SQL:
84+
strict lax $.strict.lax
85+
^
86+
87+
error
88+
$.$
89+
----
90+
at or near "$": syntax error
91+
DETAIL: source SQL:
92+
$.$
93+
^
94+
95+
error
96+
$.a$
97+
----
98+
at or near "$": syntax error
99+
DETAIL: source SQL:
100+
$.a$
101+
^
102+
103+
error
104+
$.a$a
105+
----
106+
at or near "$": syntax error
107+
DETAIL: source SQL:
108+
$.a$a
109+
^
110+
111+
parse
112+
$.a1
113+
----
114+
$.a1
115+
116+
error
117+
word $
118+
----
119+
at or near "word": syntax error
120+
DETAIL: source SQL:
121+
word $
122+
^
123+
124+
error
125+
$.1e
126+
----
127+
at or near "1": syntax error
128+
DETAIL: source SQL:
129+
$.1e
130+
^
131+
132+
# parse
133+
# $.1a
134+
# ----
135+
# $.1a

0 commit comments

Comments
 (0)
Please sign in to comment.