Skip to content

Commit

Permalink
Merge pull request #1 from eriknw/second_commit
Browse files Browse the repository at this point in the history
First PR
  • Loading branch information
dschult authored Aug 21, 2024
2 parents 59aff1f + 6e56e9f commit 0731442
Show file tree
Hide file tree
Showing 19 changed files with 993 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
cover/
.ruff_cache/

# Translations
*.mo
Expand Down Expand Up @@ -82,6 +83,9 @@ target/
profile_default/
ipython_config.py

# Vim's swap files
*.sw[op]

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
Expand Down
49 changes: 49 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# https://pre-commit.com/
#
# Before first use: `pre-commit install`
# To run manually: `pre-commit run --all-files`
# To update: `pre-commit autoupdate`
fail_fast: false
default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: check-added-large-files
- id: check-case-conflict
- id: check-merge-conflict
- id: check-symlinks
- id: check-ast
- id: check-toml
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
exclude_types: [svg]
- id: mixed-line-ending
- id: trailing-whitespace
- repo: https://github.com/asottile/pyupgrade
rev: v3.16.0
hooks:
- id: pyupgrade
args: [--py310-plus]
- repo: https://github.com/psf/black
rev: 24.4.2
hooks:
- id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.1
hooks:
- id: ruff
args:
- --fix
# - id: ruff-format # Prefer black for now
- repo: https://github.com/abravalheri/validate-pyproject
rev: v0.18
hooks:
- id: validate-pyproject
name: Validate pyproject.toml
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: no-commit-to-branch # no commit directly to main
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
BSD 3-Clause License

Copyright (c) 2024, NetworkX
Copyright (c) 2024, NetworkX Developers, NVIDIA CORPORATION, and nx-pandas contributors

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# nx-pandas
# nx-pandas
14 changes: 14 additions & 0 deletions nx_pandas/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import importlib.metadata

# This package *must* be installed even for local development,
# so checking version like this lets us be strict and informative.
try:
__version__ = importlib.metadata.version("nx-pandas")
except Exception as exc:
raise AttributeError(
"`nx_pandas.__version__` not available. This may mean "
"nx-pandas was incorrectly installed or not installed at all. "
"For local development, you may want to do an editable install via "
"`python -m pip install -e path/to/nx-pandas`"
) from exc
del importlib
221 changes: 221 additions & 0 deletions nx_pandas/_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
import pandas as pd


# https://pandas.pydata.org/docs/development/extending.html#registering-custom-accessors
@pd.api.extensions.register_dataframe_accessor("nx")
class NxAccessor:
def __init__(self, pandas_obj):
self._df = pandas_obj
self.is_directed = True
self.is_multigraph = False
self._source = "source" if "source" in pandas_obj.columns else None
self._target = "target" if "target" in pandas_obj.columns else None
self._edge_key = "edge_key" if "edge_key" in pandas_obj.columns else None
self.node_df = None
self.graph = {} # `df.nx.graph` instead of `df.graph`
self._cache = None

@property
def source(self):
if self._source is not None and self._source not in self._df.columns:
# Should we raise here to ensure consistency or let users break themselves?
raise KeyError(
f"DataFrame does not have column {self._source!r}. "
"`df.nx.source` must be set to an existing column name "
"for the DataFrame to be used as a networkx graph."
)
return self._source

@source.setter
def source(self, val):
if val is not None and val not in self._df.columns:
raise KeyError(
f"DataFrame does not have column {val!r}. "
"`df.nx.source` must be set to an existing column name "
"for the DataFrame to be used as a networkx graph."
)
self._source = val

@property
def target(self):
if self._target is not None and self._target not in self._df.columns:
raise KeyError(
f"DataFrame does not have column {self._target!r}. "
"`df.nx.target` must be set to an existing column name "
"for the DataFrame to be used as a networkx graph."
)
return self._target

@target.setter
def target(self, val):
if val is not None and val not in self._df.columns:
raise KeyError(
f"DataFrame does not have column {val!r}. "
"`df.nx.target` must be set to an existing column name "
"for the DataFrame to be used as a networkx graph."
)
self._target = val

@property
def edge_key(self):
if not self.is_multigraph:
raise AttributeError("'edge_key' attribute only exists for multigraphs")
if self._edge_key is not None and self._edge_key not in self._df.columns:
raise KeyError(
f"DataFrame does not have column {self._edge_key!r}. "
"`df.nx.edge_key` must be set to an existing column name or None "
"for the DataFrame to be used as a networkx multi-graph."
)
return self._edge_key

@edge_key.setter
def edge_key(self, val):
if not self.is_multigraph:
raise AttributeError("'edge_key' attribute only exists for multigraphs")
if val is not None and val not in self._df.columns:
raise KeyError(
f"DataFrame does not have column {val!r}. "
"`df.nx.edge_key` must be set to an existing column name or None "
"for the DataFrame to be used as a networkx multi-graph."
)
self._edge_key = val

@property
def cache_enabled(self):
return self._cache is not None

@cache_enabled.setter
def cache_enabled(self, val):
if not val:
# Wipe out the cache when disabling the cache
self._cache = None
elif self._cache is None:
# Enable cache if necessary
self._cache = {}

def __dir__(self):
attrs = super().__dir__()
if not self.is_multigraph:
attrs.remove("edge_key")
return attrs

def set_properties(
self,
*,
source=None,
target=None,
edge_key=None,
is_directed=None,
is_multigraph=None,
cache_enabled=None,
):
"""Set many graph properties (i.e., ``df.nx`` attributes) at once.
Return the original DataFrame to allow method chaining. For example::
>>> df = pd.read_csv("my_data.csv").nx.set_properties(is_directed=False)
This is a bulk transaction, so either all given attributes will be updated,
or nothing will be set if there was an exception.
"""
prev = {}
cur = {}
if source is not None:
prev["_source"] = self._source
cur["source"] = source
if target is not None:
prev["_target"] = self._target
cur["target"] = target
if is_directed is not None:
prev["is_directed"] = self.is_directed
cur["is_directed"] = is_directed
if is_multigraph is not None:
prev["is_multigraph"] = self.is_multigraph
cur["is_multigraph"] = is_multigraph
if edge_key is not None:
prev["_edge_key"] = self._edge_key
cur["edge_key"] = edge_key
if cache_enabled is not None:
prev["cache_enabled"] = self.cache_enabled
cur["cache_enabled"] = cache_enabled
try:
for attr, val in cur.items():
setattr(self, attr, val)
except Exception:
for attr, val in prev.items():
setattr(self, attr, val)
raise
return self._df


def _attr_raise_if_invalid_graph(df, attr):
try:
df.nx.source
df.nx.target
if df.nx.is_multigraph:
df.nx.edge_key
except KeyError as exc:
raise AttributeError(
f"{type(df).__name__!r} object has no attribute '{attr}'"
) from exc
if df.nx._source is None:
raise AttributeError(
f"{type(df).__name__!r} object has no attribute '{attr}'.\n\n"
"`df.nx.source` (currently None) must be set to an existing "
"column name for the DataFrame to be used as a networkx graph."
)
if df.nx._target is None:
raise AttributeError(
f"{type(df).__name__!r} object has no attribute '{attr}'.\n\n"
"`df.nx.target` (currently None) must be set to an existing "
"column name for the DataFrame to be used as a networkx graph."
)


def __networkx_backend__(self):
# `df.__networkx_backend__` only available if `df` is a valid graph
_attr_raise_if_invalid_graph(self, "__networkx_backend__")
return "pandas"


def __networkx_cache__(self):
# `df.__networkx_cache__` only available if `df` is a valid graph
_attr_raise_if_invalid_graph(self, "__networkx_cache__")
return self.nx._cache


def is_directed(self):
"""Returns True if graph is directed, False otherwise."""
return self.nx.is_directed


def is_directed_property(self):
"""Returns True if graph is directed, False otherwise."""
# `df.is_directed` only available if `df` is a valid graph
_attr_raise_if_invalid_graph(self, "is_directed")
return is_directed.__get__(self)


def is_multigraph(self):
"""Returns True if graph is a multigraph, False otherwise."""
return self.nx.is_multigraph


def is_multigraph_property(self):
"""Returns True if graph is a multigraph, False otherwise."""
# `df.is_multigraph` only available if `df` is a valid graph
_attr_raise_if_invalid_graph(self, "is_multigraph")
return is_multigraph.__get__(self)


pd.DataFrame.__networkx_backend__ = property(__networkx_backend__)
pd.DataFrame.__networkx_cache__ = property(__networkx_cache__)
# Add `is_directed` and `is_multigraph` so `not_implemented_for` decorator works
pd.DataFrame.is_directed = property(is_directed_property)
pd.DataFrame.is_multigraph = property(is_multigraph_property)


def get_info():
# Should we add config for e.g. default source, target, edge_key columns?
# Maybe config to enable/disable cache by default?
return {}
Loading

0 comments on commit 0731442

Please sign in to comment.