Skip to content

Commit

Permalink
chore: disable memtable finalization in the spark backend
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Sep 23, 2024
1 parent 1b966e6 commit ce64ba6
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
15 changes: 7 additions & 8 deletions ibis/backends/pyspark/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,15 @@

try:
from pyspark.errors import ParseException
from pyspark.errors.exceptions.connect import SparkConnectGrpcException
except ImportError:
from pyspark.sql.utils import ParseException

# Use a dummy class for when spark connect is not available
class SparkConnectGrpcException(Exception):
pass


if TYPE_CHECKING:
from collections.abc import Mapping, Sequence
from urllib.parse import ParseResult
Expand Down Expand Up @@ -186,13 +192,6 @@ def do_connect(
# Databricks Serverless compute only supports limited properties
# and any attempt to set unsupported properties will result in an error.
# https://docs.databricks.com/en/spark/conf.html
try:
from pyspark.errors.exceptions.connect import SparkConnectGrpcException
except ImportError:
# Use a dummy class for when spark connect is not available
class SparkConnectGrpcException(Exception):
pass

with contextlib.suppress(SparkConnectGrpcException):
self._session.conf.set("spark.sql.mapKeyDedupPolicy", "LAST_WIN")

Expand Down Expand Up @@ -456,7 +455,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
df.createTempView(op.name)

def _finalize_memtable(self, name: str) -> None:
self.drop_view(name, force=True)
"""No-op, otherwise a deadlock can occur when using Spark Connect."""

@contextlib.contextmanager
def _safe_raw_sql(self, query: str) -> Any:
Expand Down
5 changes: 5 additions & 0 deletions ibis/backends/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1686,6 +1686,11 @@ def test_insert_into_table_missing_columns(con, temp_table):
@pytest.mark.notyet(
["bigquery"], raises=AssertionError, reason="test is flaky", strict=False
)
@pytest.mark.notyet(
["pyspark"],
raises=AssertionError,
reason="likely, but not guaranteed deadlock when using spark connect",
)
def test_memtable_cleanup(con):
name = ibis.util.gen_name("temp_memtable")
t = ibis.memtable({"a": [1, 2, 3], "b": list("def")}, name=name)
Expand Down

0 comments on commit ce64ba6

Please sign in to comment.