Skip to content

Commit cf8c552

Browse files
committed
feat: implement query checking
The change introduces a check_query callable which runs an extensible compose pipeline of query checkers. Note regarding QueryParseException: This custom exception is intended to be a thin wrapper around a pyparsing ParseException that RDFLib raises. This avoids introducing pyparsing as a dependency just to be able to test against this exception. I feel like RDFLib should not raise a pyparsing exception but provide a thin wrapper itself. See RDFLib/rdflib#3057. The check_query function runs in SPARQLModelAdapter to enable fast failures on inapplicable queries. Note that this somewhat couples QueryConstructor to SPARQLModelAdapter; QueryConstructor should be marked private for this reason. Closes #116. Closes #126.
1 parent b1ea529 commit cf8c552

File tree

5 files changed

+108
-2
lines changed

5 files changed

+108
-2
lines changed

rdfproxy/adapter.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from rdfproxy.mapper import _ModelBindingsMapper
1010
from rdfproxy.sparql_strategies import HttpxStrategy, SPARQLStrategy
1111
from rdfproxy.utils._types import _TModelInstance
12+
from rdfproxy.utils.checkers.query_checker import check_query
1213
from rdfproxy.utils.models import Page, QueryParameters
1314

1415

@@ -40,7 +41,7 @@ def __init__(
4041
sparql_strategy: type[SPARQLStrategy] = HttpxStrategy,
4142
) -> None:
4243
self._target = target
43-
self._query = query
44+
self._query = check_query(query)
4445
self._model = model
4546

4647
self.sparql_strategy = sparql_strategy(self._target)

rdfproxy/constructor.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from rdfproxy.utils._types import _TModelInstance
2+
from rdfproxy.utils.checkers.query_checker import check_query
23
from rdfproxy.utils.models import QueryParameters
34
from rdfproxy.utils.sparql_utils import (
45
add_solution_modifier,

rdfproxy/utils/_exceptions.py

+15
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,18 @@ class InvalidGroupingKeyException(Exception):
1111

1212
class QueryConstructionException(Exception):
1313
"""Exception for indicating failed SPARQL query construction."""
14+
15+
16+
class UnsupportedQueryException(Exception):
17+
"""Exception for indicating that a given SPARQL query is not supported."""
18+
19+
20+
class QueryParseException(Exception):
21+
"""Exception for indicating that a given SPARQL query raised a parse error.
22+
23+
This exception is intended to wrap and re-raise all exceptions
24+
raised from parsing a SPARQL query with RDFLib's parseQuery function.
25+
26+
parseQuery raises a pyparsing.exceptions.ParseException,
27+
which would require to introduce pyparsing as a dependency just for testing.
28+
"""

rdfproxy/utils/_types.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
"""Type definitions for rdfproxy."""
22

3+
from collections import UserString
34
from collections.abc import Iterable
4-
from typing import Protocol, TypeAlias, TypeVar, runtime_checkable
5+
from typing import Generic, Protocol, TypeAlias, TypeVar, runtime_checkable
56

67
from pydantic import BaseModel, ConfigDict as PydanticConfigDict
8+
from rdflib.plugins.sparql.parser import parseQuery
9+
from rdflib.plugins.sparql.parserutils import CompValue
10+
from rdfproxy.utils._exceptions import QueryParseException
711

812

913
_TModelInstance = TypeVar("_TModelInstance", bound=BaseModel)
@@ -49,3 +53,24 @@ class ConfigDict(PydanticConfigDict, total=False):
4953

5054
group_by: str
5155
model_bool: _TModelBoolValue
56+
57+
58+
_TQuery = TypeVar("_TQuery", bound=str)
59+
60+
61+
class ParsedSPARQL(Generic[_TQuery], UserString):
62+
"""UserString for encapsulating parsed SPARQL queries."""
63+
64+
def __init__(self, query: _TQuery) -> None:
65+
self.data: _TQuery = query
66+
self.parse_object: CompValue = self._get_parse_object(query)
67+
68+
@staticmethod
69+
def _get_parse_object(query: str) -> CompValue:
70+
try:
71+
_parsed = parseQuery(query)
72+
except Exception as e:
73+
raise QueryParseException(e) from e
74+
else:
75+
_, parse_object = _parsed
76+
return parse_object
+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""Functionality for performing checks on SPARQL queries."""
2+
3+
import logging
4+
5+
from rdfproxy.utils._exceptions import UnsupportedQueryException
6+
from rdfproxy.utils._types import ParsedSPARQL, _TQuery
7+
from rdfproxy.utils.utils import compose_left
8+
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
def _check_select_query(parsed_sparql: ParsedSPARQL) -> ParsedSPARQL:
14+
"""Check if a SPARQL query is a SELECT query.
15+
16+
This is meant to run as a component in check_query.
17+
"""
18+
logger.debug("Running SELECT query check.")
19+
20+
if parsed_sparql.parse_object.name != "SelectQuery":
21+
raise UnsupportedQueryException("Only SELECT queries are applicable.")
22+
return parsed_sparql
23+
24+
25+
def _check_solution_modifiers(parsed_sparql: ParsedSPARQL) -> ParsedSPARQL:
26+
"""Check if a SPARQL query has a solution modifier.
27+
28+
This is meant to run as a component in check_query.
29+
"""
30+
logger.debug("Running solution modifier check.")
31+
32+
def _has_modifier():
33+
for mod_name in ["limitoffset", "groupby", "having", "orderby"]:
34+
if (mod := getattr(parsed_sparql.parse_object, mod_name)) is not None:
35+
return mod
36+
return False
37+
38+
if mod := _has_modifier():
39+
logger.critical("Detected solution modifier '%s' in outer query.", mod)
40+
raise UnsupportedQueryException(
41+
"Solution modifiers for top-level queries are currently not supported."
42+
)
43+
44+
return parsed_sparql
45+
46+
47+
def check_query(query: _TQuery) -> _TQuery:
48+
"""Check a SPARQL query by running a compose pipeline of checks.
49+
50+
The pipeline expects a SPARQL query string and
51+
will return that string if all checks pass.
52+
53+
_parse_query is meant to be the first component
54+
and _get_query_string is meant to be the last component.
55+
"""
56+
logger.debug("Running query check pipeline on '%s'", query)
57+
parsed_sparql = ParsedSPARQL(query=query)
58+
59+
result: ParsedSPARQL = compose_left(
60+
_check_select_query,
61+
_check_solution_modifiers,
62+
)(parsed_sparql)
63+
64+
return result.data

0 commit comments

Comments
 (0)