Skip to content

Commit

Permalink
feat: search current working directory for config file (#1464)
Browse files Browse the repository at this point in the history
Resolves #1333 

Adds the current working directory to the search path for the
`.pyiceberg.yaml` file.

As it is now, the file is searched in the following order:
1. the `PYICEBERG_HOME` environment variable
2. ~/
3. ./

I'm unsure if people would like to have 2 and 3 swapped. In either case,
users can still override this with the environment variable.

---------

Co-authored-by: Fokko Driesprong <[email protected]>
Co-authored-by: Kevin Liu <[email protected]>
  • Loading branch information
3 people authored Feb 12, 2025
1 parent d6dce6d commit 86b83e8
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 9 deletions.
2 changes: 1 addition & 1 deletion mkdocs/docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ catalog:

and loaded in python by calling `load_catalog(name="hive")` and `load_catalog(name="rest")`.

This information must be placed inside a file called `.pyiceberg.yaml` located either in the `$HOME` or `%USERPROFILE%` directory (depending on whether the operating system is Unix-based or Windows-based, respectively) or in the `$PYICEBERG_HOME` directory (if the corresponding environment variable is set).
This information must be placed inside a file called `.pyiceberg.yaml` located either in the `$HOME` or `%USERPROFILE%` directory (depending on whether the operating system is Unix-based or Windows-based, respectively), in the current working directory, or in the `$PYICEBERG_HOME` directory (if the corresponding environment variable is set).

For more details on possible configurations refer to the [specific page](https://py.iceberg.apache.org/configuration/).

Expand Down
4 changes: 2 additions & 2 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ hide:

There are three ways to pass in configuration:

- Using the `~/.pyiceberg.yaml` configuration file
- Using the `.pyiceberg.yaml` configuration file (Recommended)
- Through environment variables
- By passing in credentials through the CLI or the Python API

The configuration file is recommended since that's the easiest way to manage the credentials.
The configuration file can be stored in either the directory specified by the `PYICEBERG_HOME` environment variable, the home directory, or current working directory (in this order).

To change the path searched for the `.pyiceberg.yaml`, you can overwrite the `PYICEBERG_HOME` environment variable.

Expand Down
14 changes: 8 additions & 6 deletions pyiceberg/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,14 @@ def _load_yaml(directory: Optional[str]) -> Optional[RecursiveDict]:
return file_config_lowercase
return None

# Give priority to the PYICEBERG_HOME directory
if pyiceberg_home_config := _load_yaml(os.environ.get(PYICEBERG_HOME)):
return pyiceberg_home_config
# Look into the home directory
if pyiceberg_home_config := _load_yaml(os.path.expanduser("~")):
return pyiceberg_home_config
# Directories to search for the configuration file
# The current search order is: PYICEBERG_HOME, home directory, then current directory
search_dirs = [os.environ.get(PYICEBERG_HOME), os.path.expanduser("~"), os.getcwd()]

for directory in search_dirs:
if config := _load_yaml(directory):
return config

# Didn't find a config
return None

Expand Down
82 changes: 82 additions & 0 deletions tests/utils/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
import os
from typing import Any, Dict, Optional
from unittest import mock

import pytest
Expand Down Expand Up @@ -93,3 +94,84 @@ def test_from_configuration_files_get_typed_value(tmp_path_factory: pytest.TempP

assert Config().get_bool("legacy-current-snapshot-id")
assert Config().get_int("max-workers") == 4


@pytest.mark.parametrize(
"config_setup, expected_result",
[
# PYICEBERG_HOME takes precedence
(
{
"pyiceberg_home_content": "https://service.io/pyiceberg_home",
"home_content": "https://service.io/user-home",
"cwd_content": "https://service.io/cwd",
},
"https://service.io/pyiceberg_home",
),
# Home directory (~) is checked after PYICEBERG_HOME
(
{
"pyiceberg_home_content": None,
"home_content": "https://service.io/user-home",
"cwd_content": "https://service.io/cwd",
},
"https://service.io/user-home",
),
# Current working directory (.) is the last fallback
(
{
"pyiceberg_home_content": None,
"home_content": None,
"cwd_content": "https://service.io/cwd",
},
"https://service.io/cwd",
),
# No configuration files found
(
{
"pyiceberg_home_content": None,
"home_content": None,
"cwd_content": None,
},
None,
),
],
)
def test_config_lookup_order(
monkeypatch: pytest.MonkeyPatch,
tmp_path_factory: pytest.TempPathFactory,
config_setup: Dict[str, Any],
expected_result: Optional[str],
) -> None:
"""
Test that the configuration lookup prioritizes PYICEBERG_HOME, then home (~), then cwd.
"""

def create_config_file(path: str, uri: Optional[str]) -> None:
if uri:
config_file_path = os.path.join(path, ".pyiceberg.yaml")
content = {"catalog": {"default": {"uri": uri}}}
with open(config_file_path, "w", encoding="utf-8") as file:
yaml_str = as_document(content).as_yaml()
file.write(yaml_str)

# Create temporary directories for PYICEBERG_HOME, home (~), and cwd
pyiceberg_home = str(tmp_path_factory.mktemp("pyiceberg_home"))
home_dir = str(tmp_path_factory.mktemp("home"))
cwd_dir = str(tmp_path_factory.mktemp("cwd"))

# Create configuration files in the respective directories
create_config_file(pyiceberg_home, config_setup.get("pyiceberg_home_content"))
create_config_file(home_dir, config_setup.get("home_content"))
create_config_file(cwd_dir, config_setup.get("cwd_content"))

# Mock environment and paths
monkeypatch.setenv("PYICEBERG_HOME", pyiceberg_home)
monkeypatch.setattr(os.path, "expanduser", lambda _: home_dir)
monkeypatch.chdir(cwd_dir)

# Perform the lookup and validate the result
result = Config()._from_configuration_files()
assert (
result["catalog"]["default"]["uri"] if result else None # type: ignore
) == expected_result, f"Unexpected configuration result. Expected: {expected_result}, Actual: {result}"

0 comments on commit 86b83e8

Please sign in to comment.