From 1157d89db49d780e1e600ac7be822f49c3bd45c9 Mon Sep 17 00:00:00 2001 From: Norman Chen Date: Mon, 17 Mar 2025 15:04:12 -0400 Subject: [PATCH 1/2] jsonpath: add jsonpath conditional evaluation This commit adds support for evaluating jsonpath conditionals. Conditionals have the structure of `$ ? (predicate)`, and will filter the current json objects based on whether they satisfy the predicate expression. Additionally, this commit introduces `@`, which represents the current json object so filters can be used to reference the object being evaluated. Epic: None Release note (sql change): Add jsonpath filters, which take on the form `$ ? (predicate)`, allowing results to be filtered. --- .../testdata/logic_test/jsonb_path_query | 92 ++++++++++++++++++- pkg/sql/scanner/jsonpath_scan.go | 3 + pkg/util/jsonpath/eval/BUILD.bazel | 1 + pkg/util/jsonpath/eval/eval.go | 24 +++-- pkg/util/jsonpath/eval/filter.go | 40 ++++++++ pkg/util/jsonpath/eval/operation.go | 8 +- pkg/util/jsonpath/expr.go | 16 ++++ pkg/util/jsonpath/parser/jsonpath.y | 10 ++ pkg/util/jsonpath/parser/testdata/jsonpath | 21 +++++ 9 files changed, 202 insertions(+), 13 deletions(-) create mode 100644 pkg/util/jsonpath/eval/filter.go diff --git a/pkg/sql/logictest/testdata/logic_test/jsonb_path_query b/pkg/sql/logictest/testdata/logic_test/jsonb_path_query index 60ca74e7cd78..d1a74f81ece5 100644 --- a/pkg/sql/logictest/testdata/logic_test/jsonb_path_query +++ b/pkg/sql/logictest/testdata/logic_test/jsonb_path_query @@ -108,7 +108,6 @@ SELECT jsonb_path_query(data, 'strict $.aa.aaa.aaaa') FROM a query empty SELECT jsonb_path_query('{}', '$.a') - statement ok CREATE TABLE b (j JSONB) @@ -196,7 +195,6 @@ SELECT jsonb_path_query('[1, 2, 3, 4, 5]', '$[1 to 3, 2, 1 to 3]'); query empty SELECT jsonb_path_query('[1, 2, 3, 4, 5]', '$[3 to 1]'); - query T rowsort SELECT jsonb_path_query('[1, 2, 3, 4, 5]', '$[4 to 4]'); ---- @@ -255,7 +253,6 @@ SELECT jsonb_path_query('{"a": [1, 2, 3, 4, 5]}', 'strict $[3 to 1]'); query empty SELECT jsonb_path_query('{"a": [1, 2, 3]}', '$.a.b'); - statement error pgcode 2203A jsonpath member accessor can only be applied to an object SELECT jsonb_path_query('{"a": [1, 2, 3]}', 'strict $.a.b'); @@ -613,6 +610,95 @@ SELECT jsonb_path_query('{"a": 5, "b": 10}', '(1.5 > 1.2 && (!($.a == 1) || $.b ---- true +query T rowsort +SELECT jsonb_path_query('{"a": [1,2,3]}', '$.a ? (1 == 1)'); +---- +1 +2 +3 + +query empty +SELECT jsonb_path_query('{"a": [1,2,3]}', '$.a ? (1 != 1)'); + +query T +SELECT jsonb_path_query('{"a": [1,2,3]}', 'strict $.a ? (1 == 1)'); +---- +[1, 2, 3] + +query empty +SELECT jsonb_path_query('{"a": [1,2,3]}', 'strict $.a ? (1 != 1)'); + +query T rowsort +SELECT jsonb_path_query('{"a": [{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}]}', '$.a[*] ? (@.b == 1)'); +---- +{"b": 1, "c": "hello"} +{"b": 1, "c": "!"} + +query empty +SELECT jsonb_path_query('{"a": [{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}]}', 'strict $.a ? (@.b == 1)'); + +query T rowsort +SELECT jsonb_path_query('{"a": [{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}]}', '$.a ? (@.b == 1)'); +---- +{"b": 1, "c": "hello"} +{"b": 1, "c": "!"} + +query T rowsort +SELECT jsonb_path_query('{"a": [[{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}], [{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}]]}', '$.a ? (@.b == 1)'); +---- +[{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}] +[{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}] + +query T rowsort +SELECT jsonb_path_query('{"a": [[{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}], [{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}]]}', '$.a[*] ? (@.b == 1)'); +---- +{"b": 1, "c": "hello"} +{"b": 1, "c": "!"} +{"b": 1, "c": "hello"} +{"b": 1, "c": "!"} + +query empty +SELECT jsonb_path_query('{"a": [[{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}], [{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}]]}', 'strict $.a ? (@.b == 1)'); + +query empty +SELECT jsonb_path_query('{"a": [[{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}], [{"b": 1, "c": "hello"}, {"b": 2, "c": "world"}, {"b": 1, "c": "!"}]]}', 'strict $.a[*] ? (@.b == 1)'); + +query T rowsort +SELECT jsonb_path_query('{"a": [1,2,3,4,5]}', '$.a ? (@ > 3)'); +---- +4 +5 + +query T rowsort +SELECT jsonb_path_query('{"a": [{"b": 1, "c": 10}, {"b": 2, "c": 20}, {"b": 3, "c": 30}]}', '$.a ? (@.c > 15)'); +---- +{"b": 2, "c": 20} +{"b": 3, "c": 30} + +query T rowsort +SELECT jsonb_path_query('{"a": [{"b": "x", "c": true}, {"b": "y", "c": false}, {"b": "z", "c": true}]}', '$.a ? (@.c == true)'); +---- +{"b": "x", "c": true} +{"b": "z", "c": true} + +query T +SELECT jsonb_path_query('{"c": {"a": 1, "b":1}}', '$.c ? ($.c.a == @.b)'); +---- +{"a": 1, "b": 1} + +query empty +SELECT jsonb_path_query('{"a": [1,2,3]}', '$.a ? (@ > 10)'); + +query empty +SELECT jsonb_path_query('{"a": [{"b": 1, "c": 10}, {"b": 2, "c": 20}]}', '$.a ? (@.c > 100)'); + +# when string literals are supported +# query T rowsort +# SELECT jsonb_path_query('{"data": [{"val": "a", "num": 1}, {"val": "b", "num": 2}, {"val": "a", "num": 3}]}'::jsonb, '$.data ? (@.val == "a")'::jsonpath); +# ---- +# {"num": 1, "val": "a"} +# {"num": 3, "val": "a"} + # select jsonb_path_query('[1, 2, 3, 4, 5]', '$[-1]'); # select jsonb_path_query('[1, 2, 3, 4, 5]', 'strict $[-1]'); diff --git a/pkg/sql/scanner/jsonpath_scan.go b/pkg/sql/scanner/jsonpath_scan.go index db7612c7d1e9..c6ec7c2d0483 100644 --- a/pkg/sql/scanner/jsonpath_scan.go +++ b/pkg/sql/scanner/jsonpath_scan.go @@ -95,6 +95,9 @@ func (s *JSONPathScanner) Scan(lval ScanSymType) { return } return + case '@': + lval.SetID(lexbase.CURRENT) + return default: if sqllexbase.IsDigit(ch) { s.scanNumber(lval, ch) diff --git a/pkg/util/jsonpath/eval/BUILD.bazel b/pkg/util/jsonpath/eval/BUILD.bazel index ff039be895da..ddaf590ddc4f 100644 --- a/pkg/util/jsonpath/eval/BUILD.bazel +++ b/pkg/util/jsonpath/eval/BUILD.bazel @@ -5,6 +5,7 @@ go_library( srcs = [ "array.go", "eval.go", + "filter.go", "key.go", "operation.go", "scalar.go", diff --git a/pkg/util/jsonpath/eval/eval.go b/pkg/util/jsonpath/eval/eval.go index e8ba62102d0e..f87212c211f2 100644 --- a/pkg/util/jsonpath/eval/eval.go +++ b/pkg/util/jsonpath/eval/eval.go @@ -71,6 +71,8 @@ func (ctx *jsonpathCtx) eval(jp jsonpath.Path, current []json.JSON) ([]json.JSON return current, nil case jsonpath.Root: return []json.JSON{ctx.root}, nil + case jsonpath.Current: + return current, nil case jsonpath.Key: return ctx.evalKey(p, current) case jsonpath.Wildcard: @@ -85,11 +87,21 @@ func (ctx *jsonpathCtx) eval(jp jsonpath.Path, current []json.JSON) ([]json.JSON return []json.JSON{resolved}, nil case jsonpath.Operation: return ctx.evalOperation(p, current) + case jsonpath.Filter: + return ctx.evalFilter(p, current) default: return nil, errUnimplemented } } +func (ctx *jsonpathCtx) unwrap(input json.JSON) []json.JSON { + if !ctx.strict && input.Type() == json.ArrayJSONType { + array, _ := input.AsArray() + return array + } + return []json.JSON{input} +} + func (ctx *jsonpathCtx) evalAndUnwrap(path jsonpath.Path, inputs []json.JSON) ([]json.JSON, error) { results, err := ctx.eval(path, inputs) if err != nil { @@ -100,12 +112,12 @@ func (ctx *jsonpathCtx) evalAndUnwrap(path jsonpath.Path, inputs []json.JSON) ([ } var unwrapped []json.JSON for _, result := range results { - if result.Type() == json.ArrayJSONType { - array, _ := result.AsArray() - unwrapped = append(unwrapped, array...) - } else { - unwrapped = append(unwrapped, result) - } + unwrapped = append(unwrapped, ctx.unwrap(result)...) } return unwrapped, nil } + +func (ctx *jsonpathCtx) unwrapAndEval(path jsonpath.Path, input json.JSON) ([]json.JSON, error) { + unwrapped := ctx.unwrap(input) + return ctx.eval(path, unwrapped) +} diff --git a/pkg/util/jsonpath/eval/filter.go b/pkg/util/jsonpath/eval/filter.go new file mode 100644 index 000000000000..1e1bb73df68a --- /dev/null +++ b/pkg/util/jsonpath/eval/filter.go @@ -0,0 +1,40 @@ +// Copyright 2025 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package eval + +import ( + "github.com/cockroachdb/cockroach/pkg/util/json" + "github.com/cockroachdb/cockroach/pkg/util/jsonpath" + "github.com/cockroachdb/errors" +) + +func (ctx *jsonpathCtx) evalFilter(p jsonpath.Filter, current []json.JSON) ([]json.JSON, error) { + // TODO(normanchenn): clean up. + var unwrapped []json.JSON + for _, j := range current { + unwrapped = append(unwrapped, ctx.unwrap(j)...) + } + current = unwrapped + var filtered []json.JSON + // unwrap before + for _, j := range current { + results, err := ctx.eval(p.Condition, []json.JSON{j}) + if err != nil { + // Postgres doesn't error when there's a structural error within + // filter condition, and will return nothing instead. + return []json.JSON{}, nil //nolint:returnerrcheck + } + if len(results) != 1 || !isBool(results[0]) { + return nil, errors.New("filter condition must evaluate to a boolean") + } + + condition, _ := results[0].AsBool() + if condition { + filtered = append(filtered, j) + } + } + return filtered, nil +} diff --git a/pkg/util/jsonpath/eval/operation.go b/pkg/util/jsonpath/eval/operation.go index 3ab8ce82ca5f..3c5e32fdbc0b 100644 --- a/pkg/util/jsonpath/eval/operation.go +++ b/pkg/util/jsonpath/eval/operation.go @@ -53,11 +53,11 @@ func convertToBool(j json.JSON) jsonpathBool { } func (ctx *jsonpathCtx) evalOperation( - p jsonpath.Operation, current []json.JSON, + op jsonpath.Operation, current []json.JSON, ) ([]json.JSON, error) { - switch p.Type { + switch op.Type { case jsonpath.OpLogicalAnd, jsonpath.OpLogicalOr, jsonpath.OpLogicalNot: - res, err := ctx.evalLogical(p, current) + res, err := ctx.evalLogical(op, current) if err != nil { return nil, err } @@ -65,7 +65,7 @@ func (ctx *jsonpathCtx) evalOperation( case jsonpath.OpCompEqual, jsonpath.OpCompNotEqual, jsonpath.OpCompLess, jsonpath.OpCompLessEqual, jsonpath.OpCompGreater, jsonpath.OpCompGreaterEqual: - res, err := ctx.evalComparison(p, current) + res, err := ctx.evalComparison(op, current) if err != nil { return nil, err } diff --git a/pkg/util/jsonpath/expr.go b/pkg/util/jsonpath/expr.go index ab261e435c0c..737b2b31b4f6 100644 --- a/pkg/util/jsonpath/expr.go +++ b/pkg/util/jsonpath/expr.go @@ -92,3 +92,19 @@ func (a ArrayList) String() string { sb.WriteString("]") return sb.String() } + +type Filter struct { + Condition Path +} + +var _ Path = Filter{} + +func (f Filter) String() string { + return fmt.Sprintf("?(%s)", f.Condition) +} + +type Current struct{} + +var _ Path = Current{} + +func (c Current) String() string { return "@" } diff --git a/pkg/util/jsonpath/parser/jsonpath.y b/pkg/util/jsonpath/parser/jsonpath.y index 34579290f0e1..4de3bddacf69 100644 --- a/pkg/util/jsonpath/parser/jsonpath.y +++ b/pkg/util/jsonpath/parser/jsonpath.y @@ -169,6 +169,8 @@ func unaryOp(op jsonpath.OperationType, left jsonpath.Path) jsonpath.Operation { %token OR %token NOT +%token CURRENT + %type jsonpath %type expr_or_predicate %type expr @@ -251,6 +253,10 @@ path_primary: { $$.val = jsonpath.Root{} } +| CURRENT + { + $$.val = jsonpath.Current{} + } | scalar_value { $$.val = $1.path() @@ -267,6 +273,10 @@ accessor_op: { $$.val = $1.path() } +| '?' '(' predicate ')' + { + $$.val = jsonpath.Filter{Condition: $3.path()} + } ; key: diff --git a/pkg/util/jsonpath/parser/testdata/jsonpath b/pkg/util/jsonpath/parser/testdata/jsonpath index 0374fc6b146a..0c5cc4088d64 100644 --- a/pkg/util/jsonpath/parser/testdata/jsonpath +++ b/pkg/util/jsonpath/parser/testdata/jsonpath @@ -342,6 +342,27 @@ parse ---- ((1 == 1) || (1 != 1)) -- normalized! +parse +$.abc ? ($.a[1] > 2) +---- +$."abc"?(($."a"[1] > 2)) -- normalized! + +# TODO(normanchenn): this should be not allowed +parse +@ +---- +@ + +parse +$.a[*] ? (@.b > 100) +---- +$."a"[*]?((@."b" > 100)) -- normalized! + +parse +$.a[*] ? (@.b > 100 || (@.c < 100)) +---- +$."a"[*]?(((@."b" > 100) || (@."c" < 100))) -- normalized! + # postgres allows floats as array indexes # parse # $.abc[1.0] From 743d8bda5dc9e0b57f58ddfd4f7f2a3975ee2c02 Mon Sep 17 00:00:00 2001 From: Norman Chen Date: Wed, 19 Mar 2025 16:14:57 -0400 Subject: [PATCH 2/2] jsonpath/eval: add unwrap parameter to jsonpath.eval Previously, jsonpath conditionals/filters would unwrap infinitely. For example, `SELECT jsonb_path_query('{"a": [[[[{"b": 1}]]]]}', '$.a ? (@.b == 1)');` would return an entry, when postgres would not. This commit adds an unwrap boolean parameter to `eval`, which allows for control over when to stop unwrapping json arrays. Additionally, this commit updates `eval` to take in a `json.JSON` rather than a `[]json.JSON`, simplifying the evaluation logic. Epic: None Release note: None --- .../testdata/logic_test/jsonb_path_query | 9 ++ pkg/util/jsonpath/eval/array.go | 108 ++++++--------- pkg/util/jsonpath/eval/eval.go | 131 +++++++++++++----- pkg/util/jsonpath/eval/filter.go | 40 ++---- pkg/util/jsonpath/eval/key.go | 49 +++---- pkg/util/jsonpath/eval/operation.go | 37 ++--- pkg/util/jsonpath/eval/scalar.go | 10 +- 7 files changed, 207 insertions(+), 177 deletions(-) diff --git a/pkg/sql/logictest/testdata/logic_test/jsonb_path_query b/pkg/sql/logictest/testdata/logic_test/jsonb_path_query index d1a74f81ece5..a0c9faf0bef5 100644 --- a/pkg/sql/logictest/testdata/logic_test/jsonb_path_query +++ b/pkg/sql/logictest/testdata/logic_test/jsonb_path_query @@ -692,6 +692,15 @@ SELECT jsonb_path_query('{"a": [1,2,3]}', '$.a ? (@ > 10)'); query empty SELECT jsonb_path_query('{"a": [{"b": 1, "c": 10}, {"b": 2, "c": 20}]}', '$.a ? (@.c > 100)'); +query empty +SELECT jsonb_path_query('{"a": [[[{"b": 1}], [{"b": 2}]], [[{"b": 2}], [{"b": 1}]]]}', '$.a ? (@.b == 1)'); + +query empty +SELECT jsonb_path_query('{"a": [[[[[[{"b": 1}]]]]]]}', '$.a ? (@.b == 1)'); + +query empty +SELECT jsonb_path_query('{"a": [[[{"b": 1}], [{"b": 2}]]]}', '$.a ? (@.b == 1)'); + # when string literals are supported # query T rowsort # SELECT jsonb_path_query('{"data": [{"val": "a", "num": 1}, {"val": "b", "num": 2}, {"val": "a", "num": 3}]}'::jsonb, '$.data ? (@.val == "a")'::jsonpath); diff --git a/pkg/util/jsonpath/eval/array.go b/pkg/util/jsonpath/eval/array.go index 98d1438b307c..7d0d2f545238 100644 --- a/pkg/util/jsonpath/eval/array.go +++ b/pkg/util/jsonpath/eval/array.go @@ -13,97 +13,77 @@ import ( "github.com/cockroachdb/errors" ) -func (ctx *jsonpathCtx) evalArrayWildcard(current []json.JSON) ([]json.JSON, error) { - var agg []json.JSON - for _, j := range current { - if j.Type() == json.ArrayJSONType { - paths, err := json.AllPathsWithDepth(j, 1) - if err != nil { - return nil, err - } - for _, path := range paths { - if path.Len() != 1 { - return nil, errors.AssertionFailedf("unexpected path length") - } - unwrapped, err := path.FetchValIdx(0) - if err != nil { - return nil, err - } - if unwrapped == nil { - return nil, errors.AssertionFailedf("unwrapping json element") - } - agg = append(agg, unwrapped) - } - } else if !ctx.strict { - agg = append(agg, j) - } else { - return nil, pgerror.Newf(pgcode.SQLJSONArrayNotFound, "jsonpath wildcard array accessor can only be applied to an array") - } +func (ctx *jsonpathCtx) evalArrayWildcard(jsonValue json.JSON) ([]json.JSON, error) { + if jsonValue.Type() == json.ArrayJSONType { + // Do not evaluate any paths, just unwrap the current target. + return ctx.unwrapCurrentTargetAndEval(nil /* jsonPath */, jsonValue, !ctx.strict /* unwrapNext */) + } else if !ctx.strict { + return []json.JSON{jsonValue}, nil + } else { + return nil, pgerror.Newf(pgcode.SQLJSONArrayNotFound, "jsonpath wildcard array accessor can only be applied to an array") } - return agg, nil } func (ctx *jsonpathCtx) evalArrayList( - a jsonpath.ArrayList, current []json.JSON, + arrayList jsonpath.ArrayList, jsonValue json.JSON, ) ([]json.JSON, error) { + if ctx.strict && jsonValue.Type() != json.ArrayJSONType { + return nil, pgerror.Newf(pgcode.SQLJSONArrayNotFound, "jsonpath array accessor can only be applied to an array") + } var agg []json.JSON - for _, path := range a { + for _, idxAccessor := range arrayList { var from, to int var err error - if idxRange, ok := path.(jsonpath.ArrayIndexRange); ok { - from, err = ctx.resolveArrayIndex(idxRange.Start, current) + if idxRange, ok := idxAccessor.(jsonpath.ArrayIndexRange); ok { + from, err = ctx.resolveArrayIndex(idxRange.Start, jsonValue) if err != nil { return nil, err } - to, err = ctx.resolveArrayIndex(idxRange.End, current) + to, err = ctx.resolveArrayIndex(idxRange.End, jsonValue) if err != nil { return nil, err } } else { - from, err = ctx.resolveArrayIndex(path, current) + from, err = ctx.resolveArrayIndex(idxAccessor, jsonValue) if err != nil { return nil, err } to = from } - for _, j := range current { - if ctx.strict && j.Type() != json.ArrayJSONType { - return nil, pgerror.Newf(pgcode.SQLJSONArrayNotFound, - "jsonpath array accessor can only be applied to an array") - } - length := j.Len() - if j.Type() != json.ArrayJSONType { - length = 1 - } - if ctx.strict && (from < 0 || from > to || to >= length) { - return nil, pgerror.Newf(pgcode.InvalidSQLJSONSubscript, - "jsonpath array subscript is out of bounds") + length := jsonValue.Len() + if jsonValue.Type() != json.ArrayJSONType { + length = 1 + } + if ctx.strict && (from < 0 || from > to || to >= length) { + return nil, pgerror.Newf(pgcode.InvalidSQLJSONSubscript, + "jsonpath array subscript is out of bounds") + } + for i := max(from, 0); i <= min(to, length-1); i++ { + v, err := jsonArrayValueAtIndex(ctx, jsonValue, i) + if err != nil { + return nil, err } - for i := max(from, 0); i <= min(to, length-1); i++ { - v, err := jsonArrayValueAtIndex(ctx, j, i) - if err != nil { - return nil, err - } - if v == nil { - continue - } - agg = append(agg, v) + if v == nil { + continue } + agg = append(agg, v) } } return agg, nil } -func (ctx *jsonpathCtx) resolveArrayIndex(p jsonpath.Path, current []json.JSON) (int, error) { - results, err := ctx.eval(p, current) +func (ctx *jsonpathCtx) resolveArrayIndex( + jsonPath jsonpath.Path, jsonValue json.JSON, +) (int, error) { + evalResults, err := ctx.eval(jsonPath, jsonValue, !ctx.strict /* unwrap */) if err != nil { return 0, err } - if len(results) != 1 || results[0].Type() != json.NumberJSONType { + if len(evalResults) != 1 || evalResults[0].Type() != json.NumberJSONType { return -1, pgerror.Newf(pgcode.InvalidSQLJSONSubscript, "jsonpath array subscript is not a single numeric value") } - i, err := asInt(results[0]) + i, err := asInt(evalResults[0]) if err != nil { return -1, pgerror.Newf(pgcode.InvalidSQLJSONSubscript, "jsonpath array subscript is not a single numeric value") } @@ -122,22 +102,22 @@ func asInt(j json.JSON) (int, error) { return int(i64), nil } -func jsonArrayValueAtIndex(ctx *jsonpathCtx, j json.JSON, index int) (json.JSON, error) { - if ctx.strict && j.Type() != json.ArrayJSONType { +func jsonArrayValueAtIndex(ctx *jsonpathCtx, jsonValue json.JSON, index int) (json.JSON, error) { + if ctx.strict && jsonValue.Type() != json.ArrayJSONType { return nil, pgerror.Newf(pgcode.SQLJSONArrayNotFound, "jsonpath array accessor can only be applied to an array") - } else if j.Type() != json.ArrayJSONType { + } else if jsonValue.Type() != json.ArrayJSONType { if index == 0 { - return j, nil + return jsonValue, nil } return nil, nil } - if ctx.strict && index >= j.Len() { + if ctx.strict && index >= jsonValue.Len() { return nil, pgerror.Newf(pgcode.InvalidSQLJSONSubscript, "jsonpath array subscript is out of bounds") } if index < 0 { // Shouldn't happen, not supported in parser. return nil, errors.AssertionFailedf("negative array index") } - return j.FetchValIdx(index) + return jsonValue.FetchValIdx(index) } diff --git a/pkg/util/jsonpath/eval/eval.go b/pkg/util/jsonpath/eval/eval.go index f87212c211f2..440e87cf0d0f 100644 --- a/pkg/util/jsonpath/eval/eval.go +++ b/pkg/util/jsonpath/eval/eval.go @@ -39,13 +39,12 @@ func JsonpathQuery( vars: vars.JSON, strict: expr.Strict, } - // When silent is true, overwrite the strict mode. if bool(silent) { ctx.strict = false } - j, err := ctx.eval(expr.Path, []json.JSON{ctx.root}) + j, err := ctx.eval(expr.Path, ctx.root, !ctx.strict /* unwrap */) if err != nil { return nil, err } @@ -56,68 +55,132 @@ func JsonpathQuery( return res, nil } -func (ctx *jsonpathCtx) eval(jp jsonpath.Path, current []json.JSON) ([]json.JSON, error) { - switch p := jp.(type) { +func (ctx *jsonpathCtx) eval( + jsonPath jsonpath.Path, jsonValue json.JSON, unwrap bool, +) ([]json.JSON, error) { + switch path := jsonPath.(type) { case jsonpath.Paths: - // Evaluate each path within the path list, update the current JSON - // object after each evaluation. - for _, path := range p { - results, err := ctx.eval(path, current) + results := []json.JSON{jsonValue} + var err error + for _, p := range path { + results, err = ctx.evalArray(p, results, unwrap) if err != nil { return nil, err } - current = results } - return current, nil + return results, nil case jsonpath.Root: return []json.JSON{ctx.root}, nil case jsonpath.Current: - return current, nil + return []json.JSON{jsonValue}, nil case jsonpath.Key: - return ctx.evalKey(p, current) + return ctx.evalKey(path, jsonValue, unwrap) case jsonpath.Wildcard: - return ctx.evalArrayWildcard(current) + return ctx.evalArrayWildcard(jsonValue) case jsonpath.ArrayList: - return ctx.evalArrayList(p, current) + return ctx.evalArrayList(path, jsonValue) case jsonpath.Scalar: - resolved, err := ctx.resolveScalar(p) + resolved, err := ctx.resolveScalar(path) if err != nil { return nil, err } return []json.JSON{resolved}, nil case jsonpath.Operation: - return ctx.evalOperation(p, current) + res, err := ctx.evalOperation(path, jsonValue) + if err != nil { + return nil, err + } + return convertFromBool(res), nil case jsonpath.Filter: - return ctx.evalFilter(p, current) + return ctx.evalFilter(path, jsonValue, unwrap) default: return nil, errUnimplemented } } -func (ctx *jsonpathCtx) unwrap(input json.JSON) []json.JSON { - if !ctx.strict && input.Type() == json.ArrayJSONType { - array, _ := input.AsArray() - return array +func (ctx *jsonpathCtx) evalArray( + jsonPath jsonpath.Path, jsonValue []json.JSON, unwrap bool, +) ([]json.JSON, error) { + var agg []json.JSON + for _, j := range jsonValue { + arr, err := ctx.eval(jsonPath, j, unwrap) + if err != nil { + return nil, err + } + agg = append(agg, arr...) + } + return agg, nil +} + +// unwrapCurrentTargetAndEval is used to unwrap the current json array and evaluate +// the jsonpath query on each element. It is similar to executeItemUnwrapTargetArray +// in postgres/src/backend/utils/adt/jsonpath_exec.c. +func (ctx *jsonpathCtx) unwrapCurrentTargetAndEval( + jsonPath jsonpath.Path, jsonValue json.JSON, unwrapNext bool, +) ([]json.JSON, error) { + if jsonValue.Type() != json.ArrayJSONType { + return nil, errors.AssertionFailedf("unwrapCurrentTargetAndEval can only be applied to an array") } - return []json.JSON{input} + return ctx.executeAnyItem(jsonPath, jsonValue, unwrapNext) } -func (ctx *jsonpathCtx) evalAndUnwrap(path jsonpath.Path, inputs []json.JSON) ([]json.JSON, error) { - results, err := ctx.eval(path, inputs) +func (ctx *jsonpathCtx) executeAnyItem( + jsonPath jsonpath.Path, jsonValue json.JSON, unwrapNext bool, +) ([]json.JSON, error) { + childItems, err := json.AllPathsWithDepth(jsonValue, 1 /* depth */) if err != nil { return nil, err } - if ctx.strict { - return results, nil - } - var unwrapped []json.JSON - for _, result := range results { - unwrapped = append(unwrapped, ctx.unwrap(result)...) + var agg []json.JSON + for _, item := range childItems { + if item.Len() != 1 { + return nil, errors.AssertionFailedf("unexpected path length") + } + unwrappedItem, err := item.FetchValIdx(0 /* idx */) + if err != nil { + return nil, err + } + if unwrappedItem == nil { + return nil, errors.AssertionFailedf("unwrapping json element") + } + if jsonPath == nil { + agg = append(agg, unwrappedItem) + } else { + evalResults, err := ctx.eval(jsonPath, unwrappedItem, unwrapNext) + if err != nil { + return nil, err + } + agg = append(agg, evalResults...) + } } - return unwrapped, nil + return agg, nil } -func (ctx *jsonpathCtx) unwrapAndEval(path jsonpath.Path, input json.JSON) ([]json.JSON, error) { - unwrapped := ctx.unwrap(input) - return ctx.eval(path, unwrapped) +// evalAndUnwrapResult is used to evaluate the jsonpath query and unwrap the result +// if the unwrap flag is true. It is similar to executeItemOptUnwrapResult +// in postgres/src/backend/utils/adt/jsonpath_exec.c. +func (ctx *jsonpathCtx) evalAndUnwrapResult( + jsonPath jsonpath.Path, jsonValue json.JSON, unwrap bool, +) ([]json.JSON, error) { + evalResults, err := ctx.eval(jsonPath, jsonValue, !ctx.strict /* unwrap */) + if err != nil { + return nil, err + } + if unwrap && !ctx.strict { + var agg []json.JSON + for _, j := range evalResults { + if j.Type() == json.ArrayJSONType { + // Pass in nil to just unwrap the array. + arr, err := ctx.unwrapCurrentTargetAndEval(nil /* jsonPath */, j, false /* unwrapNext */) + if err != nil { + return nil, err + } + agg = append(agg, arr...) + } else { + agg = append(agg, j) + } + } + return agg, nil + } + return evalResults, nil } diff --git a/pkg/util/jsonpath/eval/filter.go b/pkg/util/jsonpath/eval/filter.go index 1e1bb73df68a..4194d3c4fd23 100644 --- a/pkg/util/jsonpath/eval/filter.go +++ b/pkg/util/jsonpath/eval/filter.go @@ -8,33 +8,23 @@ package eval import ( "github.com/cockroachdb/cockroach/pkg/util/json" "github.com/cockroachdb/cockroach/pkg/util/jsonpath" - "github.com/cockroachdb/errors" ) -func (ctx *jsonpathCtx) evalFilter(p jsonpath.Filter, current []json.JSON) ([]json.JSON, error) { - // TODO(normanchenn): clean up. - var unwrapped []json.JSON - for _, j := range current { - unwrapped = append(unwrapped, ctx.unwrap(j)...) +func (ctx *jsonpathCtx) evalFilter( + p jsonpath.Filter, current json.JSON, unwrap bool, +) ([]json.JSON, error) { + if unwrap && current.Type() == json.ArrayJSONType { + return ctx.unwrapCurrentTargetAndEval(p, current, false /* unwrapNext */) } - current = unwrapped - var filtered []json.JSON - // unwrap before - for _, j := range current { - results, err := ctx.eval(p.Condition, []json.JSON{j}) - if err != nil { - // Postgres doesn't error when there's a structural error within - // filter condition, and will return nothing instead. - return []json.JSON{}, nil //nolint:returnerrcheck - } - if len(results) != 1 || !isBool(results[0]) { - return nil, errors.New("filter condition must evaluate to a boolean") - } - - condition, _ := results[0].AsBool() - if condition { - filtered = append(filtered, j) - } + results, err := ctx.eval(p.Condition, current, !ctx.strict /* unwrap */) + if err != nil || len(results) != 1 || !isBool(results[0]) { + // Postgres doesn't error when there's a structure error within filter + // conditions, and will return nothing instead. + return []json.JSON{}, nil //nolint:returnerrcheck + } + condition, _ := results[0].AsBool() + if condition { + return []json.JSON{current}, nil } - return filtered, nil + return []json.JSON{}, nil } diff --git a/pkg/util/jsonpath/eval/key.go b/pkg/util/jsonpath/eval/key.go index ef6b8e5e3e5f..3fa5d95a2a0f 100644 --- a/pkg/util/jsonpath/eval/key.go +++ b/pkg/util/jsonpath/eval/key.go @@ -10,39 +10,26 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/util/json" "github.com/cockroachdb/cockroach/pkg/util/jsonpath" - "github.com/cockroachdb/errors" ) -func (ctx *jsonpathCtx) evalKey(k jsonpath.Key, current []json.JSON) ([]json.JSON, error) { - var agg []json.JSON - for _, j := range current { - if j.Type() == json.ObjectJSONType { - val, err := j.FetchValKey(string(k)) - if err != nil { - return nil, err - } - if val == nil { - if ctx.strict { - return nil, pgerror.Newf(pgcode.SQLJSONMemberNotFound, "JSON object does not contain key %q", string(k)) - } - continue - } - agg = append(agg, val) - } else if !ctx.strict && j.Type() == json.ArrayJSONType { - arr, ok := j.AsArray() - if !ok { - return nil, errors.AssertionFailedf("array expected") - } - for _, elem := range arr { - results, err := ctx.eval(k, []json.JSON{elem}) - if err != nil { - return nil, err - } - agg = append(agg, results...) - } - } else if ctx.strict { - return nil, pgerror.Newf(pgcode.SQLJSONMemberNotFound, "jsonpath member accessor can only be applied to an object") +func (ctx *jsonpathCtx) evalKey( + key jsonpath.Key, jsonValue json.JSON, unwrap bool, +) ([]json.JSON, error) { + if jsonValue.Type() == json.ObjectJSONType { + val, err := jsonValue.FetchValKey(string(key)) + if err != nil { + return nil, err } + if val == nil && ctx.strict { + return nil, pgerror.Newf(pgcode.SQLJSONMemberNotFound, "JSON object does not contain key %q", string(key)) + } else if val != nil { + return []json.JSON{val}, nil + } + return []json.JSON{}, nil + } else if unwrap && jsonValue.Type() == json.ArrayJSONType { + return ctx.unwrapCurrentTargetAndEval(key, jsonValue, false /* unwrapNext */) + } else if ctx.strict { + return nil, pgerror.Newf(pgcode.SQLJSONMemberNotFound, "jsonpath member accessor can only be applied to an object") } - return agg, nil + return []json.JSON{}, nil } diff --git a/pkg/util/jsonpath/eval/operation.go b/pkg/util/jsonpath/eval/operation.go index 3c5e32fdbc0b..dad0d64e5b3b 100644 --- a/pkg/util/jsonpath/eval/operation.go +++ b/pkg/util/jsonpath/eval/operation.go @@ -53,36 +53,35 @@ func convertToBool(j json.JSON) jsonpathBool { } func (ctx *jsonpathCtx) evalOperation( - op jsonpath.Operation, current []json.JSON, -) ([]json.JSON, error) { + op jsonpath.Operation, jsonValue json.JSON, +) (jsonpathBool, error) { switch op.Type { case jsonpath.OpLogicalAnd, jsonpath.OpLogicalOr, jsonpath.OpLogicalNot: - res, err := ctx.evalLogical(op, current) + res, err := ctx.evalLogical(op, jsonValue) if err != nil { - return nil, err + return jsonpathBoolUnknown, err } - return convertFromBool(res), nil + return res, nil case jsonpath.OpCompEqual, jsonpath.OpCompNotEqual, jsonpath.OpCompLess, jsonpath.OpCompLessEqual, jsonpath.OpCompGreater, jsonpath.OpCompGreaterEqual: - res, err := ctx.evalComparison(op, current) + res, err := ctx.evalComparison(op, jsonValue, true /* unwrapRight */) if err != nil { - return nil, err + return jsonpathBoolUnknown, err } - return convertFromBool(res), nil + return res, nil default: panic(errors.AssertionFailedf("unhandled operation type")) } } func (ctx *jsonpathCtx) evalLogical( - op jsonpath.Operation, current []json.JSON, + op jsonpath.Operation, current json.JSON, ) (jsonpathBool, error) { - left, err := ctx.eval(op.Left, current) + left, err := ctx.eval(op.Left, current, !ctx.strict /* unwrap */) if err != nil { return jsonpathBoolUnknown, err } - if len(left) != 1 || !isBool(left[0]) { return jsonpathBoolUnknown, errors.AssertionFailedf("left is not a boolean") } @@ -108,16 +107,14 @@ func (ctx *jsonpathCtx) evalLogical( panic(errors.AssertionFailedf("unhandled logical operation type")) } - right, err := ctx.eval(op.Right, current) + right, err := ctx.eval(op.Right, current, !ctx.strict /* unwrap */) if err != nil { return jsonpathBoolUnknown, err } - if len(right) != 1 || !isBool(right[0]) { return jsonpathBoolUnknown, errors.AssertionFailedf("right is not a boolean") } rightBool := convertToBool(right[0]) - switch op.Type { case jsonpath.OpLogicalAnd: if rightBool == jsonpathBoolTrue { @@ -139,13 +136,17 @@ func (ctx *jsonpathCtx) evalLogical( // right paths satisfy the condition. In strict mode, even if a pair has been // found, all pairs need to be checked for errors. func (ctx *jsonpathCtx) evalComparison( - p jsonpath.Operation, current []json.JSON, + op jsonpath.Operation, jsonValue json.JSON, unwrapRight bool, ) (jsonpathBool, error) { - left, err := ctx.evalAndUnwrap(p.Left, current) + // The left argument results are always auto-unwrapped. + left, err := ctx.evalAndUnwrapResult(op.Left, jsonValue, true /* unwrap */) if err != nil { return jsonpathBoolUnknown, err } - right, err := ctx.evalAndUnwrap(p.Right, current) + // The right argument results are conditionally unwrapped. Currently, it is + // always unwrapped, but in the future for operations like like_regex, we + // don't want to unwrap the right argument. + right, err := ctx.evalAndUnwrapResult(op.Right, jsonValue, unwrapRight) if err != nil { return jsonpathBoolUnknown, err } @@ -154,7 +155,7 @@ func (ctx *jsonpathCtx) evalComparison( found := false for _, l := range left { for _, r := range right { - res, err := execComparison(l, r, p.Type) + res, err := execComparison(l, r, op.Type) if err != nil { return jsonpathBoolUnknown, err } diff --git a/pkg/util/jsonpath/eval/scalar.go b/pkg/util/jsonpath/eval/scalar.go index 5571fa90dacb..5e95383bb2fe 100644 --- a/pkg/util/jsonpath/eval/scalar.go +++ b/pkg/util/jsonpath/eval/scalar.go @@ -12,16 +12,16 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/jsonpath" ) -func (ctx *jsonpathCtx) resolveScalar(s jsonpath.Scalar) (json.JSON, error) { - if s.Type == jsonpath.ScalarVariable { - val, err := ctx.vars.FetchValKey(s.Variable) +func (ctx *jsonpathCtx) resolveScalar(scalar jsonpath.Scalar) (json.JSON, error) { + if scalar.Type == jsonpath.ScalarVariable { + val, err := ctx.vars.FetchValKey(scalar.Variable) if err != nil { return nil, err } if val == nil { - return nil, pgerror.Newf(pgcode.UndefinedObject, "could not find jsonpath variable %q", s.Variable) + return nil, pgerror.Newf(pgcode.UndefinedObject, "could not find jsonpath variable %q", scalar.Variable) } return val, nil } - return s.Value, nil + return scalar.Value, nil }