From b07172764078b0cab3970b22c6c2d210992edd1a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 13 Feb 2025 11:29:58 -0500 Subject: [PATCH 1/2] test(snowflake): add failing test case for column names with spaces --- ibis/backends/snowflake/tests/test_client.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ibis/backends/snowflake/tests/test_client.py b/ibis/backends/snowflake/tests/test_client.py index 3f956b0de954..ae97259851cb 100644 --- a/ibis/backends/snowflake/tests/test_client.py +++ b/ibis/backends/snowflake/tests/test_client.py @@ -4,6 +4,8 @@ import os from collections import Counter +import hypothesis as h +import hypothesis.strategies as st import pandas as pd import pandas.testing as tm import pyarrow as pa @@ -436,3 +438,16 @@ def test_insert_dict_variants(con): con.insert(name, ibis.memtable(data)) assert len(t.execute()) == 4 + + +@h.given( + column_name=st.text( + st.characters(exclude_characters="\x00"), min_size=1, max_size=255 + ) +) +def test_fancy_column_names(con, column_name): + name = gen_name("test_fancy_column_names") + testdf = pd.DataFrame({column_name: [1, 2, 3]}) + t = con.create_table(name, obj=testdf, temp=True) + assert t.columns == (column_name,) + assert t.count().execute() == 3 From 46d6e50138a4786a3c200b25f5dc3d546e431ab2 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 13 Feb 2025 11:30:36 -0500 Subject: [PATCH 2/2] fix(snowflake): use `get` instead of `get_path`; `get_path` does not support columns with spaces without complex quoting --- ibis/backends/snowflake/__init__.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 83adf6b7c10d..07eb67e975bd 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -1126,15 +1126,14 @@ def read_parquet( type_mapper = self.compiler.type_mapper + dialect = self.dialect stmts = [ f"CREATE TEMP STAGE {stage} FILE_FORMAT = (TYPE = PARQUET {options})", sge.Create( kind="TABLE", - this=sge.Schema( - this=qtable, expressions=schema.to_sqlglot(self.dialect) - ), + this=sge.Schema(this=qtable, expressions=schema.to_sqlglot(dialect)), properties=sge.Properties(expressions=[sge.TemporaryProperty()]), - ).sql(self.dialect), + ).sql(dialect), ] query = ";\n".join(stmts) @@ -1144,7 +1143,7 @@ def read_parquet( sg.select( *( sg.cast( - self.compiler.f.get_path(param, sge.convert(col)), + self.compiler.f.get(param, sge.convert(col)), type_mapper.from_ibis(typ), ) for col, typ in schema.items() @@ -1153,9 +1152,7 @@ def read_parquet( .from_(sge.Table(this=sge.Var(this=f"@{stage}"))) .subquery() ) - copy_query = sge.Copy(this=qtable, kind=True, files=[copy_select]).sql( - self.dialect - ) + copy_query = sge.Copy(this=qtable, kind=True, files=[copy_select]).sql(dialect) with self._safe_raw_sql(query) as cur: cur.execute(f"PUT 'file://{abspath}' @{stage} PARALLEL = {threads:d}") cur.execute(copy_query)